To run: Beforehand:

module load pandoc

In R:

setwd("~/shared-gandalm/brain_CTP/Scripts/methylation/analysis/combine_methods/ctp_validation")
rmarkdown::render("ctp_validation_Jaffe2018_age0.Rmd", "html_document")

1 Set-up

1.1 Directories and libraries

library(tidyverse)
library(rmarkdown)    # You need this library to run this template.
library(epuRate)      # Install with remotes::install_github("holtzy/epuRate", force=TRUE) - use this instead of devtools::install_github()
library(data.table)
library(DT)
library(tidyverse)
library(dplyr)
library(reshape2)
library(uwot) # for umap
library(methylCC)
library(lme4)
library(compositions)
library(zCompositions)
library(mixOmics)
source("~/project-gandalm/software/ANCOM-v2.1/scripts/ancom_v2.1.R")
library(ALDEx2)
library(factoextra)
library(ggplot2)
library(RColorBrewer)
library(viridis)
library(ggpubr)
library(GGally)
library(lattice)
library(gridExtra)

# BiocManager::install("M3C")
# library(M3C)
#filen <- "Jaffe2018_age18"
filen <- "Jaffe2018_age0"
#filen <- "Jaffe2018_age18_ComBatplate_neg0"
out_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/Jaffe2018/analysis"

# Cell-type number
cellnum <- 9
cellnum <- 7

write_out = TRUE

# Read in reference object
# - use Luo et al. reference with DMRs identified from 450K overlap 
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_methylcc_dmr_n200_p1e_6.rds"
# - use 450K dlpfc guintivano reference
# ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/dlpfc_450k_guintivano/dlpfc_450k_guintivano_aut_mask_methylcc_dmr_n200.rds"
# - use Luo et al. reference with DMRs pre-identified by Luo et al.
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_predmr_ilmn450k_celltype_na5_methylcc_dmr_n200_p1e_1.rds"
# - use extreme_methylation_sites.R output
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_extreme17_dmr_low0.3_high0.7.rds"
# - use chisq statistic, 5 cell-types, bonferroni p<0.001, marker contributing >=50% of chisq stat, take top 200
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_chisq_dmr_cell5_bonf001_contr50pc_top200.rds"

# - not the below
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_methylcc_dmr_n200.rds"
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_methylcc_dmr_ctpcollapse_glia_n200_p1e_6.rds"
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_methylcc_dmr_ctpcollapse_n200_p1e_6.rds"
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_methylcc_dmr_n100.rds"
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_methylcc_dmr_n50.rds"
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_chisq_dmr_ilmn450kepic_aggto100bp_cell7_bonf001_contr50pc_top200.rds"
#ref <- readRDS(ref_dir)
# - try the other reference matrix, which worked with Houseman
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/ilmn450k_celltype_na2.txt"
#ref <- read.delim(ref_dir, header = T, as.is = T)

# Pheno
pheno_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/Jaffe2018/processed/pheno_Jaffe2018.txt"

# methylCC
# - use Luo et al. reference with DMRs identified from 450K overlap 
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_methylcc_out_n200_p1e-6.rds", sep = ""))
# - use 450K dlpfc guintivano reference (+/- randomisation of sample order)
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_methylcc_out_n200_dlpfc_450k_guintivano_hpc.rds", sep = ""))
# - use Luo et al. reference with DMRs pre-identified by Luo et al.
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_predmr_ilmn450k_celltype_na5_methylcc_dmr_n200_p1e_1.rds", sep = ""))
# - use extreme_methylation_sites.R output
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_extreme17_dmr_low0.3_high0.7.rds", sep = ""))
# - use chisq statistic, 5 cell-types, bonferroni p<0.001, marker contributing >=50% of chisq stat, take top 200
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_chisq_dmr_cell5_bonf001_contr50pc_top200.rds", sep = ""))
#filen0 <- "Jaffe2018_age18"
filen0 <- "Jaffe2018_age0"
#methylcc_dir <- paste(out_dir, "/", filen, "_aut_mask_chisq_dmr_cell5_bonf001_contr50pc_top200.rds", sep = "")
# methylcc_dir <- paste(out_dir, "/", filen, "_aut_mask_chisq_dmr_cell5_bonf001_contr50pc_top200_aggpostmcc.txt", sep = "")
#methylcc_dir <- paste(out_dir, "/", filen0, "_aut_mask_chisq_dmr_ilmn450kepic_aggto100bp_cell7_bonf001_contr50pc_top200_aggpostmcc.txt", sep = "")

if (cellnum == 9) {
  methylcc_dir <- paste(out_dir, "/", filen0, "_aut_mask_extremes_dmr_ilmn450kepic_aggto100bp_c10_cell9_split6040all_aggpostmcc.txt", sep = "")
} else if (cellnum == 7) {
  methylcc_dir <- paste(out_dir, "/", filen0, "_aut_mask_extremes_dmr_ilmn450kepic_aggto100bp_c10_cell7_split6040all_aggpostmcc.txt", sep = "")
}

# methylCC with NeuN+/- data (Guintivano DLPFC)
methylcc_neun_dir <- paste("~/shared-gandalm/brain_CTP/Data/methylation/Jaffe2018/analysis/", filen, "_aut_mask_methylcc_out_n200_dlpfc_450k_guintivano_hpc.rds", sep = "")

# eBayes + Houseman
houseman_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/Jaffe2018/analysis/Jaffe2018_age0_aut_mask_t_ref450K_toast_houseman_ls.rds"

# sequencing with extremes + Houseman
houseman_seq_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/Jaffe2018/analysis/Jaffe2018_age0_aut_mask_t_Luo2020_extremes_dmr_ilmn450kepic_aggto100bp_c10_cell7_split6040all_houseman_ls.rds"

# celfie
celfie_dir <-  "~/shared-gandalm/brain_CTP/Data/methylation/Jaffe2018/analysis/celfie/Jaffe2018_age0_aut_mask_t_celfie.out/1_tissue_proportions.txt"
celfie_label_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/Jaffe2018/analysis/celfie/Jaffe2018_age0_aut_mask_t_celfie.in"

# Houseman0
#houseman0_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/Jaffe2018/processed/cellcounts_Jaffe2018_age18_plate.txt"

# smartSV
#smartsva_dir <- paste("~/shared-gandalm/brain_CTP/Data/methylation/Jaffe2018/analysis/", filen, "_aut_mask_t_SmartSVA.rds", sep  = "")
# smartSV with ComBAT
#filen <- "Jaffe2018_age18_ComBatplate_neg0"
filen <- "Jaffe2018_age0_ComBatplate_neg0"
smartsva_dir <- paste("~/shared-gandalm/brain_CTP/Data/methylation/Jaffe2018/analysis/", filen, "_aut_mask_t_SmartSVA.rds", sep  = "")
filen <- "Jaffe2018_age0"

vae_dir <- "~/shared-gandalm/brain_CTP/Data/integration/methylnet/combined/full_output_latent.csv"

# Jaffe CTP
jaffe_dir <- "~/shared-gandalm/GenomicDatasets/LIBD_phase2/methyl/Jaffe_methylation_DLPFC/pheno/GSE74193_series_matrix_pheno_df.txt" 

1.2 Read-in data

# Pheno
pheno <- read.delim(pheno_dir, header = T, as.is = T)
pheno$group <- ifelse(pheno$group == "Control", "Control", "SCZ")

# methylCC
methylcc_ctp <- read.delim(methylcc_dir)
ind <- grep("NonN", colnames(methylcc_ctp))
foo <- colnames(methylcc_ctp)[ind]
foo2 <- unlist(lapply(strsplit(gsub("NonN_", "", foo), "_"), function(x) x[1]))
colnames(methylcc_ctp)[ind] <- foo2
colnames(methylcc_ctp)[2:ncol(methylcc_ctp)] <- paste("mcc_", colnames(methylcc_ctp)[2:ncol(methylcc_ctp)], sep = "")
# methylcc <- readRDS(methylcc_dir)
# methylcc_ctp <- cell_counts(methylcc)
# ctp_sum <- methylcc@summary
# ctp_input <- ctp_sum$input
# colSums(ctp_input$zmat)
# # Would use this to mimic Supp Fig 1
# colSums(ref$zmat) # this is the initial starting proportions

# methylCC on Guintivano DLPFC reference (NeuN+/-)
# methylcc_neun <- readRDS(methylcc_neun_dir)
# methylcc_neun_ctp <- cell_counts(methylcc_neun)
# colnames(methylcc_neun_ctp) <- c("mcc_NeuN_neg", "mcc_NeuN_pos")
# ctp_neun_sum <- methylcc_neun@summary
# ctp_neun_input <- ctp_neun_sum$input
# colSums(ctp_neun_input$zmat)

# eBayes + Houseman
houseman.ls <- readRDS(houseman_dir)
# - coefs_sub
coefs_sub <- houseman.ls$coefs_sub
colnames(coefs_sub)[2:ncol(coefs_sub)] <- paste("h_", colnames(coefs_sub)[2:ncol(coefs_sub)], sep = "")
colnames(coefs_sub)[ncol(coefs_sub)] <- "h_error_sub"
coefs_brain <- houseman.ls$coefs_brain
colnames(coefs_brain)[2:ncol(coefs_brain)] <- paste("h_", colnames(coefs_brain)[2:ncol(coefs_brain)], sep = "")
colnames(coefs_brain)[ncol(coefs_brain)] <- "h_error_brain"

# sequencing + Houseman
houseman_seq.ls <- readRDS(houseman_seq_dir)
houseman_seq <- houseman_seq.ls[[1]]
ind <- grep("NonN", colnames(houseman_seq))
foo <- colnames(houseman_seq)[ind]
foo2 <- unlist(lapply(strsplit(gsub("NonN_", "", foo), "_"), function(x) x[1]))
colnames(houseman_seq)[ind] <- foo2
colnames(houseman_seq)[2:ncol(houseman_seq)] <- paste("hseq_", colnames(houseman_seq)[2:ncol(houseman_seq)], sep = "")

# CelFIE
celfie <- fread(celfie_dir)
colnames(celfie) <- c("IID_short", "celfie_Exc", "celfie_Inh", "celfie_Astro", "celfie_Endo", "celfie_Micro", "celfie_Oligo", "celfie_OPC", "unknown1")
iid_short <- data.frame(IID = pheno$IID)
iid_short$IID_short <- unlist(lapply(strsplit(iid_short$IID, "_"), function(x) x[1]))
celfie <- inner_join(iid_short, celfie, by = "IID_short")
#celfie_label <- fread(celfie_label_dir)

# smartSVA
smartsva <- readRDS(smartsva_dir)
smartsva <- data.frame(IID_short = rownames(smartsva), smartsva)
smartsva <- inner_join(iid_short, smartsva, by = "IID_short")

if (ncol(smartsva) > 11) {
  smartsva <- smartsva[,c(1:11)]
}

# VAE, MethylNet
vae <- read.csv(vae_dir)
colnames(vae) <- c("IID", paste("VAEe", 0:9, sep = ""))

# Jaffe CTP
jaffe <- read.delim(jaffe_dir, header = T, as.is = T)
jaffe <- fread(jaffe_dir)
jaffe <- jaffe[,c("title", "brnum", "comp_da_neuron", "comp_es", "comp_neun_neg", "comp_neun_pos", "comp_npc")]
colnames(jaffe)[1] <- "IID"

1.2.1 Join into 1 dataframe

#ctp_pheno <- inner_join(data.frame(IID = rownames(methylcc_ctp), methylcc_ctp), pheno, by = "IID")
methylcc_ctp.tmp <- methylcc_ctp
methylcc_ctp.tmp$mcc_Glial <- rowSums(methylcc_ctp.tmp[,-c(grep("IID", colnames(methylcc_ctp.tmp)), grep("Exc", colnames(methylcc_ctp.tmp)), grep("Inh", colnames(methylcc_ctp.tmp)))])
methylcc_ctp.tmp$mcc_Neuron <- rowSums(methylcc_ctp.tmp[,c(grep("Exc", colnames(methylcc_ctp.tmp)), grep("Inh", colnames(methylcc_ctp.tmp)))])
ctp_pheno <- inner_join(methylcc_ctp.tmp, pheno, by = "IID")
#ctp_pheno <- inner_join(ctp_pheno, data.frame(IID = rownames(methylcc_neun_ctp), methylcc_neun_ctp), by = "IID")
ctp_pheno <- inner_join(ctp_pheno, smartsva, by = "IID")
ctp_pheno <- inner_join(ctp_pheno, jaffe, by = "IID")
#ctp_pheno <- inner_join(ctp_pheno, houseman0, by = "IID")

# Houseman with array reference
ctp_pheno <- inner_join(ctp_pheno, coefs_sub, by = "IID")
ctp_pheno <- inner_join(ctp_pheno, coefs_brain, by = "IID")
ctp_pheno$h_Glial <- rowSums(ctp_pheno[,c("h_ASTRO", "h_OPC", "h_OLIGO", "h_ENDO", "h_MONO")])
ctp_pheno$h_Neuron <- rowSums(ctp_pheno[,c("h_GABA", "h_GLU")])

# Houseman with sequencing reference
ctp_pheno <- inner_join(ctp_pheno, houseman_seq, by = "IID")
ctp_pheno$hseq_Glial <- rowSums(ctp_pheno[,c("hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC")])
ctp_pheno$hseq_Neuron <- rowSums(ctp_pheno[,c("hseq_Exc", "hseq_Inh")])

# CelFIE
ctp_pheno <- inner_join(ctp_pheno, celfie, by = c("IID", "IID_short"))
ctp_pheno$celfie_Glial <- rowSums(ctp_pheno[,c("celfie_Astro", "celfie_Endo", "celfie_Micro", "celfie_Oligo", "celfie_OPC")])
ctp_pheno$celfie_Neuron <- rowSums(ctp_pheno[,c("celfie_Exc", "celfie_Inh")])

ctp_pheno <- inner_join(ctp_pheno, vae, by = "IID")

1.2.2 Write output

if (write_out == TRUE) {
  write.table(ctp_pheno, paste(out_dir, "/", filen, "_ctp_pheno.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}

1.2.3 Set variables

keep_cols <- c("IID", "brnum", "group", "age", "sex", "race", "plate", "slide")
#ctp_cols_mcc <- c(colnames(ctp_pheno)[grep("mcc", colnames(ctp_pheno))])
#level2_cols_mcc <- ctp_cols_mcc[-which(ctp_cols_mcc %in% c("mcc_Glial", "mcc_Neuron"))]

ctp_cols <- c(colnames(ctp_pheno)[grep("hseq", colnames(ctp_pheno))])
ctp_cols <- ctp_cols[-which(ctp_cols == "hseq_error")]
level2_cols <- ctp_cols[-which(ctp_cols %in% c("hseq_Glial", "hseq_Neuron"))]

glial_rmoligo_n <- colnames(ctp_pheno)[grep("hseq", colnames(ctp_pheno))]
glial_rmoligo_n <- glial_rmoligo_n[-which(glial_rmoligo_n %in% c("hseq_Exc", "hseq_Inh", "hseq_Oligo", "hseq_error", "hseq_Glial", "hseq_Neuron"))]
neuron_n <- c("hseq_Exc", "hseq_Inh")

2 Check baseline variable differences

  • look for confounding
# Plate
plate <- ctp_pheno %>% group_by(group, plate) %>% summarise(n=n()) %>% spread(group, n)
## `summarise()` has grouped output by 'group'. You can override using the `.groups` argument.
plate
## # A tibble: 5 × 3
##   plate      Control   SCZ
##   <chr>        <int> <int>
## 1 Lieber_104      48    NA
## 2 Lieber_244     135    79
## 3 Lieber_289      63    52
## 4 Lieber_30       NA    26
## 5 Lieber_315      43    29
#chisq.test(plate %>% dplyr::select("SCZ", "Control"))

# Race
race <- ctp_pheno %>% group_by(group, race) %>% summarise(n=n()) %>% spread(group, n)
## `summarise()` has grouped output by 'group'. You can override using the `.groups` argument.
race
## # A tibble: 2 × 3
##   race  Control   SCZ
##   <chr>   <int> <int>
## 1 AA        148    84
## 2 CAUC      141   102
chisq.test(race %>% dplyr::select("SCZ", "Control"))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  race %>% dplyr::select("SCZ", "Control")
## X-squared = 1.4244, df = 1, p-value = 0.2327
# Sex
sex <- ctp_pheno %>% group_by(group, sex) %>% summarise(n=n()) %>% spread(group, n)
## `summarise()` has grouped output by 'group'. You can override using the `.groups` argument.
sex
## # A tibble: 2 × 3
##   sex   Control   SCZ
##   <chr>   <int> <int>
## 1 F          87    71
## 2 M         202   115
chisq.test(sex %>% dplyr::select("SCZ", "Control"))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  sex %>% dplyr::select("SCZ", "Control")
## X-squared = 2.965, df = 1, p-value = 0.08508
# Age
summary(lm(age ~ group, data = ctp_pheno))
## 
## Call:
## lm(formula = age ~ group, data = ctp_pheno)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.387 -13.711   0.463  13.072  49.810 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   35.390      1.084  32.659   <2e-16 ***
## groupSCZ      14.753      1.732   8.519   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.42 on 473 degrees of freedom
## Multiple R-squared:  0.133,  Adjusted R-squared:  0.1312 
## F-statistic: 72.58 on 1 and 473 DF,  p-value: < 2.2e-16

3 Check association of sSVs and batch effects

tmp.long <- ctp_pheno %>% dplyr::select(c("IID", "plate", "race", "sex", "age", "group", "hseq_Glial", "sSV1", "sSV2", "sSV3", "sSV4", "sSV5", "sSV6", "sSV7", "sSV8")) %>% melt(id.vars = c("IID", "plate", "race", "sex", "age", "group", "hseq_Glial"), variable.name = "sSV_num", value.name = "sSV")

ggplot(tmp.long, aes(x = plate, y = sSV)) + geom_boxplot() + facet_wrap(~ sSV_num, scales = "free_y")

ggplot(tmp.long, aes(x = race, y = sSV)) + geom_boxplot() + facet_wrap(~ sSV_num, scales = "free_y")

ggplot(tmp.long, aes(x = sex, y = sSV)) + geom_boxplot() + facet_wrap(~ sSV_num)

ggplot(tmp.long, aes(x = group, y = sSV)) + geom_boxplot() + facet_wrap(~ sSV_num)

ggplot(tmp.long, aes(x = age, y = sSV, colour = plate)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ sSV_num)
## `geom_smooth()` using formula 'y ~ x'

ggplot(tmp.long, aes(x = hseq_Glial, y = sSV, colour = plate)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ sSV_num)
## `geom_smooth()` using formula 'y ~ x'

4 Check association of VAE embeddings and batch effects

tmp.long <- ctp_pheno %>% dplyr::select(c("IID", "plate", "race", "sex", "age", "group", "hseq_Glial", "VAEe0", "VAEe1", "VAEe2", "VAEe3", "VAEe4", "VAEe5", "VAEe6", "VAEe7", "VAEe8", "VAEe9")) %>% melt(id.vars = c("IID", "plate", "race", "sex", "age", "group", "hseq_Glial"), variable.name = "VAEe", value.name = "embedding")

ggplot(tmp.long, aes(x = plate, y = embedding)) + geom_boxplot() + facet_wrap(~ VAEe, scales = "free_y")

ggplot(tmp.long, aes(x = race, y = embedding)) + geom_boxplot() + facet_wrap(~ VAEe, scales = "free_y")

ggplot(tmp.long, aes(x = sex, y = embedding)) + geom_boxplot() + facet_wrap(~ VAEe)

ggplot(tmp.long, aes(x = group, y = embedding)) + geom_boxplot() + facet_wrap(~ VAEe)

ggplot(tmp.long, aes(x = age, y = embedding, colour = plate)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ VAEe)
## `geom_smooth()` using formula 'y ~ x'

ggplot(tmp.long, aes(x = hseq_Glial, y = embedding, colour = plate)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ VAEe)
## `geom_smooth()` using formula 'y ~ x'

5 Boxplots - raw

Add analysis info on

houseman_seq.long <- melt(houseman_seq, variable.name = "celltype", value.name = "seq_houseman_CTP")
## Using IID as id variables
houseman_seq.gg <- ggplot(houseman_seq.long, aes(x = celltype, y = seq_houseman_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

methylcc_ctp.long <- melt(methylcc_ctp, variable.name = "celltype", value.name = "methylcc_CTP")
## Using IID as id variables
methylcc_ctp.gg <- ggplot(methylcc_ctp.long, aes(x = celltype, y = methylcc_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))
#+ theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1))

coefs_sub.long <- melt(coefs_sub, variable.name = "celltype", value.name = "eBayes_houseman_CTP") %>% mutate(MatchCell = match(celltype, c("h_GLU", "h_GABA", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC")))
## Using IID as id variables
coefs_sub.gg <- coefs_sub.long %>% mutate(celltype = fct_reorder(celltype, MatchCell)) %>% ggplot(aes(x = celltype, y = eBayes_houseman_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

coefs_brain.long <- melt(coefs_brain, variable.name = "celltype", value.name = "eBayes_houseman_CTP")
## Using IID as id variables
coefs_brain.gg <- ggplot(coefs_brain.long, aes(x = celltype, y = eBayes_houseman_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

smartsva.long <- melt(smartsva, variable.name = "celltype", value.name = "smartsva_CTP")
## Using IID, IID_short as id variables
smartsva.gg <- ggplot(smartsva.long, aes(x = celltype, y = smartsva_CTP, colour = celltype)) + geom_boxplot()  + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

jaffe.long <- melt(jaffe, variable.name = "celltype", value.name = "Jaffe_CTP")
## Using IID, brnum as id variables
jaffe.long$celltype <- as.character(jaffe.long$celltype)
jaffe.long$celltype[which(jaffe.long$celltype == "comp_da_neuron")] <- "da_neuron"
jaffe.long$celltype[which(jaffe.long$celltype == "comp_es")] <- "es"
jaffe.long$celltype[which(jaffe.long$celltype == "comp_neun_neg")] <- "neun_neg"
jaffe.long$celltype[which(jaffe.long$celltype == "comp_neun_pos")] <- "neun_pos"
jaffe.long$celltype[which(jaffe.long$celltype == "comp_npc")] <- "npc"
jaffe.gg <- ggplot(jaffe.long, aes(x = celltype, y = Jaffe_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

celfie.long <- melt(celfie, id.vars = c("IID", "IID_short"), variable.name = "celltype", value.name = "CelFIE_CTP")
celfie.gg <- ggplot(celfie.long, aes(x = celltype, y = CelFIE_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

vae.long <- melt(vae, id.vars = c("IID"), variable.name = "celltype", value.name = "VAE_embed")
vae.gg <- ggplot(vae.long, aes(x = celltype, y = VAE_embed, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

# Arrange into 1 plot
#ggarrange(houseman_seq.gg, methylcc_ctp.gg, coefs_sub.gg, coefs_brain.gg, jaffe.gg, celfie.gg, smartsva.gg, nrow = 3, ncol = 2)

ggarrange(houseman_seq.gg, methylcc_ctp.gg, coefs_sub.gg, coefs_brain.gg, celfie.gg, smartsva.gg, nrow = 3, ncol = 2)
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).

5.1 houseman_seq

# - boxplot
hseq_ctp_pheno.tmp <- ctp_pheno %>% dplyr::select(c(all_of(keep_cols), all_of(ctp_cols)))

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno.tmp, paste(out_dir, "/hseq_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
hseq_ctp_pheno.long <- melt(hseq_ctp_pheno.tmp, measure.vars = c(all_of(ctp_cols)), variable.name = "celltype", value.name = "CTP")
hseq_ctp_pheno.long$Level <- ifelse(hseq_ctp_pheno.long$celltype %in% c("hseq_Neuron", "hseq_Glial"), "Level1", "Level2")
hseq_ctp_pheno.long$Region <- "PFC"
hseq_ctp_pheno.long$celltype <- gsub("hseq_", "", hseq_ctp_pheno.long$celltype)
hseq_ctp_pheno.long$celltype <- unlist(lapply(strsplit(hseq_ctp_pheno.long$celltype, "_"), function(x) x[1]))
hseq_ctp_pheno.long$MatchCell <- match(hseq_ctp_pheno.long$celltype, rev(c("Exc", "Inh", "Astro", "Micro", "Endo", "Oligo", "OPC", "Neuron", "Glial")))
hseq_ctp_pheno.long %>% mutate(celltype = fct_reorder(celltype, MatchCell)) %>%
  ggplot(aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

5.1.1 Adjust for oligodendrocyte proportion

# New adjustment which only changes neuronal populations
hseq_ctp_pheno_adjoligo.tmp <- hseq_ctp_pheno.tmp %>%
  mutate(sum_neuro_oligo = hseq_Exc + hseq_Inh + hseq_Oligo) %>%
  mutate(Exc_Inh_ratio = (hseq_Exc/(hseq_Exc + hseq_Inh))) %>%
  mutate(Inh_Exc_ratio = (hseq_Inh/(hseq_Exc + hseq_Inh))) %>%
  mutate(hseq_Exc = Exc_Inh_ratio * sum_neuro_oligo) %>%
  mutate(hseq_Inh = Inh_Exc_ratio * sum_neuro_oligo) %>%
  dplyr::select(-c("hseq_Oligo", "hseq_Neuron", "hseq_Glial"))

# Old adjustment which changed ALL CTPs
# methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo <- methylcc_ctp_pheno_adjoligo.tmp$Exc + methylcc_ctp_pheno_adjoligo.tmp $Inh + methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R + methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP
# methylcc_ctp_pheno_adjoligo.tmp$Exc <- methylcc_ctp_pheno_adjoligo.tmp$Exc/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$Inh <- methylcc_ctp_pheno_adjoligo.tmp$Inh/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R <- methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP <- methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo

hseq_ctp_pheno_adjoligo.tmp$hseq_Neuron <- rowSums(hseq_ctp_pheno_adjoligo.tmp[,neuron_n])
hseq_ctp_pheno_adjoligo.tmp$hseq_Glial <- rowSums(hseq_ctp_pheno_adjoligo.tmp[,glial_rmoligo_n])

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno_adjoligo.tmp, paste(out_dir, "/hseq_adjoligo_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
hseq_ctp_pheno_adjoligo.long <- melt(hseq_ctp_pheno_adjoligo.tmp, measure.vars = c(all_of(neuron_n), all_of(glial_rmoligo_n), "hseq_Neuron", "hseq_Glial"), variable.name = "celltype", value.name = "CTP")
hseq_ctp_pheno_adjoligo.long$Level <- ifelse(hseq_ctp_pheno_adjoligo.long$celltype %in% c("hseq_Neuron", "hseq_Glial"), "Level1", "Level2")
hseq_ctp_pheno_adjoligo.long$Region <- "PFC"
hseq_ctp_pheno_adjoligo.long$celltype <- gsub("hseq_", "", hseq_ctp_pheno_adjoligo.long$celltype)

hseq_ctp_pheno_adjoligo.long %>% mutate(celltype = fct_reorder(celltype, match(celltype, rev(c("Exc", "Inh", "Astro", "Endo", "Micro", "OPC"))))) %>%
  ggplot(aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

5.2 methylCC

ctp_cols_mcc <- gsub("hseq", "mcc", ctp_cols)
neuron_n_mcc <- gsub("hseq", "mcc", neuron_n)
glial_rmoligo_n_mcc <- gsub("hseq", "mcc", glial_rmoligo_n)

# - boxplot
methylcc_ctp_pheno.tmp <- ctp_pheno %>% dplyr::select(c(all_of(keep_cols), all_of(ctp_cols_mcc)))

if (write_out == TRUE) {
  write.table(methylcc_ctp_pheno.tmp, paste(out_dir, "/methylcc_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
methylcc_ctp_pheno.long <- melt(methylcc_ctp_pheno.tmp, measure.vars = c(all_of(ctp_cols_mcc)), variable.name = "celltype", value.name = "CTP")
methylcc_ctp_pheno.long$Level <- ifelse(methylcc_ctp_pheno.long$celltype %in% c("mcc_Neuron", "mcc_Glial"), "Level1", "Level2")
methylcc_ctp_pheno.long$Region <- "PFC"
methylcc_ctp_pheno.long$celltype <- gsub("mcc_", "", methylcc_ctp_pheno.long$celltype)

methylcc_ctp_pheno.long %>% mutate(celltype = fct_reorder(celltype, match(celltype, rev(c("Exc", "Inh", "Astro", "Endo", "Micro", "Oligo", "OPC"))))) %>%
ggplot(aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

5.2.1 Adjust for oligodendrocyte proportion

# New adjustment which only changes neuronal populations
methylcc_ctp_pheno_adjoligo.tmp <- methylcc_ctp_pheno.tmp %>%
  mutate(sum_neuro_oligo = mcc_Exc + mcc_Inh + mcc_Oligo) %>%
  mutate(Exc_Inh_ratio = (mcc_Exc/(mcc_Exc +mcc_Inh))) %>%
  mutate(Inh_Exc_ratio = (mcc_Inh/(mcc_Exc + mcc_Inh))) %>%
  mutate(mcc_Exc = Exc_Inh_ratio * sum_neuro_oligo) %>%
  mutate(mcc_Inh = Inh_Exc_ratio * sum_neuro_oligo) %>%
  dplyr::select(-c("mcc_Oligo", "mcc_Neuron", "mcc_Glial"))

# Old adjustment which changed ALL CTPs
# methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo <- methylcc_ctp_pheno_adjoligo.tmp$Exc + methylcc_ctp_pheno_adjoligo.tmp $Inh + methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R + methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP
# methylcc_ctp_pheno_adjoligo.tmp$Exc <- methylcc_ctp_pheno_adjoligo.tmp$Exc/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$Inh <- methylcc_ctp_pheno_adjoligo.tmp$Inh/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R <- methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP <- methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo

methylcc_ctp_pheno_adjoligo.tmp$mcc_Neuron <- rowSums(methylcc_ctp_pheno_adjoligo.tmp[,neuron_n_mcc])
methylcc_ctp_pheno_adjoligo.tmp$mcc_Glial <- rowSums(methylcc_ctp_pheno_adjoligo.tmp[,glial_rmoligo_n_mcc])

if (write_out == TRUE) {
  write.table(methylcc_ctp_pheno_adjoligo.tmp, paste(out_dir, "/methylcc_adjoligo_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
methylcc_ctp_pheno_adjoligo.long <- melt(methylcc_ctp_pheno_adjoligo.tmp, measure.vars = c(all_of(neuron_n_mcc), all_of(glial_rmoligo_n_mcc), "mcc_Neuron", "mcc_Glial"), variable.name = "celltype", value.name = "CTP")
methylcc_ctp_pheno_adjoligo.long$Level <- ifelse(methylcc_ctp_pheno_adjoligo.long$celltype %in% c("mcc_Neuron", "mcc_Glial"), "Level1", "Level2")
methylcc_ctp_pheno_adjoligo.long$Region <- "PFC"
methylcc_ctp_pheno_adjoligo.long$celltype <- gsub("mcc_", "", methylcc_ctp_pheno_adjoligo.long$celltype)

methylcc_ctp_pheno_adjoligo.long %>% mutate(celltype = fct_reorder(celltype, match(celltype, rev(c("Exc", "Inh", "Astro", "Endo", "Micro", "OPC"))))) %>%
ggplot(aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

5.3 coefs_sub

# - boxplot
coefs_sub_ctp_pheno.tmp <- ctp_pheno %>% dplyr::select(c(all_of(keep_cols), "h_GLU", "h_GABA", "h_ASTRO", "h_ENDO", "h_MONO", "h_OPC", "h_OLIGO"))
coefs_sub_ctp_pheno.tmp$neuron_sum <- coefs_sub_ctp_pheno.tmp$h_GABA + coefs_sub_ctp_pheno.tmp$h_GLU
coefs_sub_ctp_pheno.tmp$glia_sum <- coefs_sub_ctp_pheno.tmp$h_ASTRO + coefs_sub_ctp_pheno.tmp$h_OPC + coefs_sub_ctp_pheno.tmp$h_OLIGO + coefs_sub_ctp_pheno.tmp$h_ENDO + coefs_sub_ctp_pheno.tmp$h_MONO

if (write_out == TRUE) {
  write.table(coefs_sub_ctp_pheno.tmp, paste(out_dir, "/coefs_sub_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
coefs_sub_ctp_pheno.long <- melt(coefs_sub_ctp_pheno.tmp, measure.vars = c("h_GLU", "h_GABA", "h_ASTRO", "h_ENDO", "h_MONO", "h_OPC", "h_OLIGO", "neuron_sum", "glia_sum"), variable.name = "celltype", value.name = "CTP")
coefs_sub_ctp_pheno.long$Level <- ifelse(coefs_sub_ctp_pheno.long$celltype %in% c("neuron_sum", "glia_sum"), "Level1", "Level2")
coefs_sub_ctp_pheno.long$Region <- "PFC"

ggplot(coefs_sub_ctp_pheno.long, aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

6 Boxplots: clr-transformation (2 ways to handle zeros)

  • Justification: does it make sense to run an ANOVA on CTP ~ group, if the CTPs are related?
  • Eg. what does variance analysis look like when the y-variable for each individual is non-independent?

6.1 houseman_seq

6.1.1 clr-transformation with z-compositions to deal with zeros

  • IDs go as columns, CTPs as rows for zCompositions cmultRepl(), and IDs as rows for the clr(?
keep_pheno_col <- hseq_ctp_pheno.tmp %>% dplyr::select(all_of(keep_cols))
hseq_ctp_pheno.tmp.clr <- as.matrix(hseq_ctp_pheno.tmp %>% dplyr::select(c(all_of(level2_cols))))

# Apply zCompositions to empirically get the best replacement of zeros
# - it looks like you have to transpose the matrix (with samples as columns) to do this properly (?)
length(which(hseq_ctp_pheno.tmp.clr == 0)) # check there are no 0s
## [1] 0
check <- which(hseq_ctp_pheno.tmp.clr == 0, arr.ind = T)

hseq_ctp_pheno.tmp.clr_t <- t(hseq_ctp_pheno.tmp.clr)

if (length(which(hseq_ctp_pheno.tmp.clr == 0)) > 0) {
  
  check <- which(hseq_ctp_pheno.tmp.clr_t == 0, arr.ind = T)
  hseq_ctp_pheno.tmp.clr0_t <- cmultRepl(hseq_ctp_pheno.tmp.clr_t, output = "p-counts")
  hseq_ctp_pheno.tmp.clr0_t[check]

} else {
  
  print("no zcompositions applied as no 0 values")
  hseq_ctp_pheno.tmp.clr0_t <- hseq_ctp_pheno.tmp.clr_t
  
}
## [1] "no zcompositions applied as no 0 values"
# Perform clr-transform
hseq_ctp_pheno.tmp.clr0.tr <- clr(t(hseq_ctp_pheno.tmp.clr_t))
hseq_ctp_pheno.clr.zc <- data.frame(keep_pheno_col, hseq_ctp_pheno.tmp.clr0.tr)

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno.clr.zc, paste(out_dir, "/hseq_ctp_pheno.clr.zc.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}

Make dataframe

hseq_ctp_pheno.clr.zc.long <- melt(hseq_ctp_pheno.clr.zc, measure.vars = c(all_of(level2_cols)), variable.name = "celltype", value.name = "CTP")
#methylcc_ctp_pheno.clr.zc.long$Level <- ifelse(methylcc_ctp_pheno.clr.zc.long$celltype %in% c("neuron_sum", "glia_sum"), "Level1", "Level2")
hseq_ctp_pheno.clr.zc.long$Region <- "PFC"
ggplot(hseq_ctp_pheno.clr.zc.long, aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +
  coord_flip() + theme_bw() + xlab("")

  ggtitle("zCompositions")
## $title
## [1] "zCompositions"
## 
## attr(,"class")
## [1] "labels"
    #facet_grid(Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

6.1.2 clr-transformation with 1e-3 minimum

  • Justification: does it make sense to run an ANOVA on CTP ~ group, if the CTPs are related?

  • Eg. what does variance analysis look like when the y-variable for each individual is non-independent?

  • N.B. IDs go as columns, CTPs as rows

keep_pheno_col <- hseq_ctp_pheno.tmp %>% dplyr::select(all_of(keep_cols))
hseq_ctp_pheno.tmp.clr <- as.matrix(hseq_ctp_pheno.tmp %>% dplyr::select(c(all_of(level2_cols))))

# Make minimum CTP = 1e-3
length(which(hseq_ctp_pheno.tmp.clr <= 1e-3)) # check there are no 0s
## [1] 0
hseq_ctp_pheno.tmp.clr0 <- hseq_ctp_pheno.tmp.clr
hseq_ctp_pheno.tmp.clr0[which(hseq_ctp_pheno.tmp.clr0 <= 1e-3)] <- 1e-3

# Perform clr-transform
hseq_ctp_pheno.tmp.clr0.tr <- clr(hseq_ctp_pheno.tmp.clr0)
hseq_ctp_pheno.clr.1e3 <- data.frame(keep_pheno_col, hseq_ctp_pheno.tmp.clr0.tr)

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno.clr.1e3, paste(out_dir, "/hseq_ctp_pheno.clr.1e3.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}

Make dataframe

hseq_ctp_pheno.clr.1e3.long <- melt(hseq_ctp_pheno.clr.1e3, measure.vars = c(all_of(level2_cols)), variable.name = "celltype", value.name = "CTP")
#methylcc_ctp_pheno.clr.zc.long$Level <- ifelse(methylcc_ctp_pheno.clr.zc.long$celltype %in% c("neuron_sum", "glia_sum"), "Level1", "Level2")
hseq_ctp_pheno.clr.1e3.long$Region <- "PFC"
ggplot(hseq_ctp_pheno.clr.1e3.long, aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +
  coord_flip() + theme_bw() + xlab("") +
  ggtitle("Minimum value = 1e-3")

    #facet_grid(Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

6.1.2.1 Adjust for oligodendrocyte proportion

keep_pheno_col <- hseq_ctp_pheno_adjoligo.tmp %>% dplyr::select(all_of(keep_cols))
hseq_ctp_pheno_adjoligo.tmp.clr <- as.matrix(hseq_ctp_pheno_adjoligo.tmp %>% dplyr::select(c(all_of(neuron_n), all_of(glial_rmoligo_n))))

# Make minimum CTP = 1e-3
length(which(hseq_ctp_pheno_adjoligo.tmp.clr <= 1e-3)) # check there are no 0s
## [1] 0
hseq_ctp_pheno_adjoligo.tmp.clr0 <- hseq_ctp_pheno_adjoligo.tmp.clr
hseq_ctp_pheno_adjoligo.tmp.clr0[which(hseq_ctp_pheno_adjoligo.tmp.clr0 <= 1e-3)] <- 1e-3

# Perform clr-transform
hseq_ctp_pheno_adjoligo.tmp.clr0.tr <- clr(hseq_ctp_pheno_adjoligo.tmp.clr0)
hseq_ctp_pheno_adjoligo.clr.1e3 <- data.frame(keep_pheno_col, hseq_ctp_pheno_adjoligo.tmp.clr0.tr)

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno_adjoligo.clr.1e3, paste(out_dir, "/hseq_adjoligo_ctp_pheno.clr.1e3.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}

Make dataframe

hseq_ctp_pheno_adjoligo.clr.1e3.long <- melt(hseq_ctp_pheno_adjoligo.clr.1e3, measure.vars = c(all_of(neuron_n), all_of(glial_rmoligo_n)), variable.name = "celltype", value.name = "CTP")
#methylcc_ctp_pheno.clr.zc.long$Level <- ifelse(methylcc_ctp_pheno.clr.zc.long$celltype %in% c("neuron_sum", "glia_sum"), "Level1", "Level2")
hseq_ctp_pheno_adjoligo.clr.1e3.long$Region <- "PFC"
ggplot(hseq_ctp_pheno_adjoligo.clr.1e3.long, aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +
  coord_flip() + theme_bw() + xlab("") +
  ggtitle("Minimum value = 1e-3")

    #facet_grid(Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

7 Model: per-cell-type, compare raw

if (cellnum == 7) {

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_Oligo, hseq_OPC) ~ group, data = hseq_ctp_pheno.tmp))

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_Oligo, hseq_OPC) ~ group + age + sex + race + plate, data = hseq_ctp_pheno.tmp))
  
} else if (cellnum == 9) {
  
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_Oligo_MBP, NonN_OPC) ~ group, data = hseq_ctp_pheno.tmp))

  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_Oligo_MBP, NonN_OPC) ~ group + age + sex + race + plate, data = hseq_ctp_pheno.tmp))

}
## Response hseq_Exc :
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + sex + race + plate, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.154618 -0.021923  0.005134  0.026752  0.086864 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.572e-01  7.859e-03  20.003   <2e-16 ***
## groupSCZ         6.849e-03  4.545e-03   1.507   0.1325    
## age              9.943e-05  1.071e-04   0.928   0.3537    
## sexM            -2.705e-03  4.115e-03  -0.657   0.5112    
## raceCAUC         4.701e-03  3.835e-03   1.226   0.2208    
## plateLieber_244 -2.572e-03  6.900e-03  -0.373   0.7095    
## plateLieber_289  2.136e-03  7.382e-03   0.289   0.7725    
## plateLieber_30  -8.172e-03  1.098e-02  -0.744   0.4571    
## plateLieber_315  1.624e-02  7.907e-03   2.054   0.0406 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0411 on 466 degrees of freedom
## Multiple R-squared:  0.04361,    Adjusted R-squared:  0.02719 
## F-statistic: 2.656 on 8 and 466 DF,  p-value: 0.007386
## 
## 
## Response hseq_Inh :
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + sex + race + plate, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.077194 -0.015526 -0.001389  0.013280  0.097177 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.911e-01  4.723e-03  40.467   <2e-16 ***
## groupSCZ         6.717e-03  2.731e-03   2.459   0.0143 *  
## age             -8.400e-04  6.437e-05 -13.050   <2e-16 ***
## sexM             1.519e-03  2.473e-03   0.614   0.5394    
## raceCAUC        -3.404e-03  2.305e-03  -1.477   0.1403    
## plateLieber_244 -3.213e-03  4.147e-03  -0.775   0.4389    
## plateLieber_289  1.419e-03  4.437e-03   0.320   0.7493    
## plateLieber_30  -1.090e-02  6.599e-03  -1.652   0.0992 .  
## plateLieber_315  8.719e-03  4.752e-03   1.835   0.0672 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0247 on 466 degrees of freedom
## Multiple R-squared:  0.2969, Adjusted R-squared:  0.2849 
## F-statistic:  24.6 on 8 and 466 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Astro :
## 
## Call:
## lm(formula = hseq_Astro ~ group + age + sex + race + plate, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.073797 -0.015338 -0.000129  0.015133  0.091459 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.578e-01  4.817e-03  32.771  < 2e-16 ***
## groupSCZ         7.618e-03  2.785e-03   2.735  0.00648 ** 
## age             -3.735e-04  6.564e-05  -5.689 2.26e-08 ***
## sexM            -3.332e-03  2.522e-03  -1.322  0.18698    
## raceCAUC        -1.531e-04  2.350e-03  -0.065  0.94808    
## plateLieber_244 -2.275e-03  4.228e-03  -0.538  0.59084    
## plateLieber_289  2.881e-03  4.524e-03   0.637  0.52461    
## plateLieber_30  -6.916e-03  6.729e-03  -1.028  0.30458    
## plateLieber_315  2.269e-02  4.846e-03   4.683 3.72e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02519 on 466 degrees of freedom
## Multiple R-squared:  0.1565, Adjusted R-squared:  0.142 
## F-statistic: 10.81 on 8 and 466 DF,  p-value: 5.29e-14
## 
## 
## Response hseq_Endo :
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + sex + race + plate, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.015895 -0.003449 -0.000352  0.002778  0.032992 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.0475461  0.0010567  44.996  < 2e-16 ***
## groupSCZ         0.0023649  0.0006111   3.870 0.000124 ***
## age             -0.0001641  0.0000144 -11.395  < 2e-16 ***
## sexM            -0.0011838  0.0005532  -2.140 0.032872 *  
## raceCAUC        -0.0007409  0.0005156  -1.437 0.151399    
## plateLieber_244 -0.0002080  0.0009276  -0.224 0.822709    
## plateLieber_289  0.0023775  0.0009925   2.395 0.016995 *  
## plateLieber_30  -0.0029151  0.0014763  -1.975 0.048904 *  
## plateLieber_315  0.0043664  0.0010631   4.107 4.73e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.005526 on 466 degrees of freedom
## Multiple R-squared:  0.2669, Adjusted R-squared:  0.2544 
## F-statistic: 21.21 on 8 and 466 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Micro :
## 
## Call:
## lm(formula = hseq_Micro ~ group + age + sex + race + plate, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.014971 -0.003645 -0.000758  0.003224  0.034847 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      4.894e-02  1.117e-03  43.812   <2e-16 ***
## groupSCZ         2.905e-04  6.459e-04   0.450   0.6531    
## age             -2.981e-04  1.522e-05 -19.581   <2e-16 ***
## sexM             5.569e-04  5.848e-04   0.952   0.3414    
## raceCAUC         9.769e-06  5.450e-04   0.018   0.9857    
## plateLieber_244  2.660e-04  9.806e-04   0.271   0.7863    
## plateLieber_289  6.184e-04  1.049e-03   0.589   0.5558    
## plateLieber_30  -2.855e-04  1.561e-03  -0.183   0.8549    
## plateLieber_315  2.448e-03  1.124e-03   2.179   0.0298 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.005842 on 466 degrees of freedom
## Multiple R-squared:  0.508,  Adjusted R-squared:  0.4996 
## F-statistic: 60.15 on 8 and 466 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Oligo :
## 
## Call:
## lm(formula = hseq_Oligo ~ group + age + sex + race + plate, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.19960 -0.05855 -0.00381  0.04623  0.31752 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.3337813  0.0160512  20.795  < 2e-16 ***
## groupSCZ        -0.0288880  0.0092820  -3.112 0.001971 ** 
## age              0.0016504  0.0002188   7.545  2.4e-13 ***
## sexM             0.0028344  0.0084031   0.337 0.736035    
## raceCAUC        -0.0015795  0.0078316  -0.202 0.840256    
## plateLieber_244  0.0096507  0.0140907   0.685 0.493749    
## plateLieber_289 -0.0197290  0.0150765  -1.309 0.191319    
## plateLieber_30   0.0335220  0.0224256   1.495 0.135640    
## plateLieber_315 -0.0546807  0.0161482  -3.386 0.000769 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08394 on 466 degrees of freedom
## Multiple R-squared:  0.1586, Adjusted R-squared:  0.1441 
## F-statistic: 10.98 on 8 and 466 DF,  p-value: 3.127e-14
## 
## 
## Response hseq_OPC :
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + sex + race + plate, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0120785 -0.0027964 -0.0002854  0.0022440  0.0253175 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.353e-02  7.912e-04  17.097  < 2e-16 ***
## groupSCZ         1.570e-03  4.575e-04   3.431 0.000656 ***
## age              7.324e-07  1.078e-05   0.068 0.945882    
## sexM             1.132e-04  4.142e-04   0.273 0.784764    
## raceCAUC        -2.820e-04  3.861e-04  -0.731 0.465445    
## plateLieber_244  7.840e-05  6.946e-04   0.113 0.910177    
## plateLieber_289 -3.808e-04  7.432e-04  -0.512 0.608656    
## plateLieber_30  -5.446e-04  1.105e-03  -0.493 0.622473    
## plateLieber_315  2.803e-03  7.960e-04   3.521 0.000472 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004138 on 466 degrees of freedom
## Multiple R-squared:  0.09025,    Adjusted R-squared:  0.07463 
## F-statistic: 5.778 on 8 and 466 DF,  p-value: 4.874e-07

7.1 Test aggregated sums

summary(lm(hseq_Glial ~ group, data = ctp_pheno))
## 
## Call:
## lm(formula = hseq_Glial ~ group, data = ctp_pheno)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.132700 -0.043370 -0.007019  0.033785  0.240464 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.625528   0.003815 163.962   <2e-16 ***
## groupSCZ    -0.003258   0.006097  -0.534    0.593    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06486 on 473 degrees of freedom
## Multiple R-squared:  0.0006033,  Adjusted R-squared:  -0.00151 
## F-statistic: 0.2855 on 1 and 473 DF,  p-value: 0.5934
summary(lm(mcc_Glial ~ group, data = ctp_pheno))
## 
## Call:
## lm(formula = mcc_Glial ~ group, data = ctp_pheno)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39126 -0.09027 -0.00443  0.08503  0.42309 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.523652   0.007842   66.77   <2e-16 ***
## groupSCZ    0.003138   0.012533    0.25    0.802    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1333 on 473 degrees of freedom
## Multiple R-squared:  0.0001325,  Adjusted R-squared:  -0.001981 
## F-statistic: 0.0627 on 1 and 473 DF,  p-value: 0.8024
summary(lm(h_NeuN_neg ~ group, data = ctp_pheno))
## 
## Call:
## lm(formula = h_NeuN_neg ~ group, data = ctp_pheno)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.21538 -0.06214 -0.01045  0.05343  0.36667 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.661014   0.005644 117.128   <2e-16 ***
## groupSCZ    0.012362   0.009019   1.371    0.171    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09594 on 473 degrees of freedom
## Multiple R-squared:  0.003956,   Adjusted R-squared:  0.001851 
## F-statistic: 1.879 on 1 and 473 DF,  p-value: 0.1711
summary(lm(hseq_Glial ~ group + age + sex + race + plate, data = ctp_pheno))
## 
## Call:
## lm(formula = hseq_Glial ~ group + age + sex + race + plate, data = ctp_pheno)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.129301 -0.042520 -0.007637  0.031444  0.241467 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.6016360  0.0119912  50.173  < 2e-16 ***
## groupSCZ        -0.0170454  0.0069342  -2.458   0.0143 *  
## age              0.0008155  0.0001634   4.990 8.53e-07 ***
## sexM            -0.0010116  0.0062776  -0.161   0.8721    
## raceCAUC        -0.0027457  0.0058507  -0.469   0.6391    
## plateLieber_244  0.0075124  0.0105266   0.714   0.4758    
## plateLieber_289 -0.0142332  0.0112631  -1.264   0.2070    
## plateLieber_30   0.0228604  0.0167532   1.365   0.1731    
## plateLieber_315 -0.0223713  0.0120636  -1.854   0.0643 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06271 on 466 degrees of freedom
## Multiple R-squared:  0.07945,    Adjusted R-squared:  0.06364 
## F-statistic: 5.027 on 8 and 466 DF,  p-value: 5.305e-06
summary(lm(mcc_Glial ~ group + age + sex + race + plate, data = ctp_pheno))
## 
## Call:
## lm(formula = mcc_Glial ~ group + age + sex + race + plate, data = ctp_pheno)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33330 -0.08334 -0.00243  0.06840  0.41129 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.4546536  0.0249090  18.253  < 2e-16 ***
## groupSCZ        -0.0235421  0.0144042  -1.634    0.103    
## age              0.0015676  0.0003395   4.618 5.02e-06 ***
## sexM             0.0085967  0.0130402   0.659    0.510    
## raceCAUC         0.0130649  0.0121534   1.075    0.283    
## plateLieber_244  0.0001692  0.0218666   0.008    0.994    
## plateLieber_289 -0.0194010  0.0233964  -0.829    0.407    
## plateLieber_30   0.0315167  0.0348010   0.906    0.366    
## plateLieber_315  0.0355474  0.0250595   1.419    0.157    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1303 on 466 degrees of freedom
## Multiple R-squared:  0.05954,    Adjusted R-squared:  0.04339 
## F-statistic: 3.688 on 8 and 466 DF,  p-value: 0.0003455
summary(lm(h_NeuN_neg ~ group + age + sex + race + plate, data = ctp_pheno))
## 
## Call:
## lm(formula = h_NeuN_neg ~ group + age + sex + race + plate, data = ctp_pheno)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.19348 -0.06171 -0.01070  0.04933  0.33286 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.6207388  0.0176098  35.250  < 2e-16 ***
## groupSCZ        -0.0013403  0.0101833  -0.132  0.89534    
## age              0.0006218  0.0002400   2.591  0.00988 ** 
## sexM             0.0067481  0.0092190   0.732  0.46455    
## raceCAUC        -0.0153755  0.0085921  -1.789  0.07418 .  
## plateLieber_244  0.0082637  0.0154590   0.535  0.59321    
## plateLieber_289  0.0254361  0.0165405   1.538  0.12478    
## plateLieber_30   0.0302595  0.0246032   1.230  0.21935    
## plateLieber_315  0.0783019  0.0177162   4.420 1.23e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0921 on 466 degrees of freedom
## Multiple R-squared:  0.09577,    Adjusted R-squared:  0.08024 
## F-statistic: 6.169 on 8 and 466 DF,  p-value: 1.398e-07

7.2 Adjust for oligodendrocyte proportion

if (cellnum == 7) {

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_OPC) ~ group, data = hseq_ctp_pheno_adjoligo.tmp))

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_OPC) ~ group + age + sex + race + plate, data = hseq_ctp_pheno_adjoligo.tmp))

} else if (cellnum == 9) {
  
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_OPC) ~ group, data = methylcc_ctp_pheno_adjoligo.tmp))

  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_OPC) ~ group + age + sex + race + plate, data = methylcc_ctp_pheno_adjoligo.tmp))

}
## Response hseq_Exc :
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + sex + race + plate, data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.241529 -0.015248  0.009359  0.027839  0.094106 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.3028591  0.0096106  31.513   <2e-16 ***
## groupSCZ        -0.0062043  0.0055576  -1.116   0.2648    
## age              0.0013430  0.0001310  10.254   <2e-16 ***
## sexM            -0.0005484  0.0050313  -0.109   0.9132    
## raceCAUC         0.0092590  0.0046891   1.975   0.0489 *  
## plateLieber_244 -0.0014283  0.0084368  -0.169   0.8656    
## plateLieber_289 -0.0072352  0.0090270  -0.802   0.4232    
## plateLieber_30   0.0051445  0.0134272   0.383   0.7018    
## plateLieber_315 -0.0033809  0.0096687  -0.350   0.7267    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05026 on 466 degrees of freedom
## Multiple R-squared:  0.2055, Adjusted R-squared:  0.1918 
## F-statistic: 15.06 on 8 and 466 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Inh :
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + sex + race + plate, data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.09080 -0.03532 -0.00948  0.01856  0.32829 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.3792761  0.0113596  33.388  < 2e-16 ***
## groupSCZ        -0.0091174  0.0065690  -1.388  0.16582    
## age             -0.0004332  0.0001548  -2.798  0.00535 ** 
## sexM             0.0021962  0.0059469   0.369  0.71207    
## raceCAUC        -0.0095411  0.0055425  -1.721  0.08583 .  
## plateLieber_244  0.0052945  0.0099722   0.531  0.59572    
## plateLieber_289 -0.0089397  0.0106698  -0.838  0.40255    
## plateLieber_30   0.0093033  0.0158708   0.586  0.55803    
## plateLieber_315 -0.0263409  0.0114283  -2.305  0.02161 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05941 on 466 degrees of freedom
## Multiple R-squared:  0.08194,    Adjusted R-squared:  0.06618 
## F-statistic: 5.199 on 8 and 466 DF,  p-value: 3.079e-06
## 
## 
## Response hseq_Astro :
## 
## Call:
## lm(formula = hseq_Astro ~ group + age + sex + race + plate, data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.073797 -0.015338 -0.000129  0.015133  0.091459 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.578e-01  4.817e-03  32.771  < 2e-16 ***
## groupSCZ         7.618e-03  2.785e-03   2.735  0.00648 ** 
## age             -3.735e-04  6.564e-05  -5.689 2.26e-08 ***
## sexM            -3.332e-03  2.522e-03  -1.322  0.18698    
## raceCAUC        -1.531e-04  2.350e-03  -0.065  0.94808    
## plateLieber_244 -2.275e-03  4.228e-03  -0.538  0.59084    
## plateLieber_289  2.881e-03  4.524e-03   0.637  0.52461    
## plateLieber_30  -6.916e-03  6.729e-03  -1.028  0.30458    
## plateLieber_315  2.269e-02  4.846e-03   4.683 3.72e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02519 on 466 degrees of freedom
## Multiple R-squared:  0.1565, Adjusted R-squared:  0.142 
## F-statistic: 10.81 on 8 and 466 DF,  p-value: 5.29e-14
## 
## 
## Response hseq_Endo :
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + sex + race + plate, data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.015895 -0.003449 -0.000352  0.002778  0.032992 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.0475461  0.0010567  44.996  < 2e-16 ***
## groupSCZ         0.0023649  0.0006111   3.870 0.000124 ***
## age             -0.0001641  0.0000144 -11.395  < 2e-16 ***
## sexM            -0.0011838  0.0005532  -2.140 0.032872 *  
## raceCAUC        -0.0007409  0.0005156  -1.437 0.151399    
## plateLieber_244 -0.0002080  0.0009276  -0.224 0.822709    
## plateLieber_289  0.0023775  0.0009925   2.395 0.016995 *  
## plateLieber_30  -0.0029151  0.0014763  -1.975 0.048904 *  
## plateLieber_315  0.0043664  0.0010631   4.107 4.73e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.005526 on 466 degrees of freedom
## Multiple R-squared:  0.2669, Adjusted R-squared:  0.2544 
## F-statistic: 21.21 on 8 and 466 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Micro :
## 
## Call:
## lm(formula = hseq_Micro ~ group + age + sex + race + plate, data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.014971 -0.003645 -0.000758  0.003224  0.034847 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      4.894e-02  1.117e-03  43.812   <2e-16 ***
## groupSCZ         2.905e-04  6.459e-04   0.450   0.6531    
## age             -2.981e-04  1.522e-05 -19.581   <2e-16 ***
## sexM             5.569e-04  5.848e-04   0.952   0.3414    
## raceCAUC         9.769e-06  5.450e-04   0.018   0.9857    
## plateLieber_244  2.660e-04  9.806e-04   0.271   0.7863    
## plateLieber_289  6.184e-04  1.049e-03   0.589   0.5558    
## plateLieber_30  -2.855e-04  1.561e-03  -0.183   0.8549    
## plateLieber_315  2.448e-03  1.124e-03   2.179   0.0298 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.005842 on 466 degrees of freedom
## Multiple R-squared:  0.508,  Adjusted R-squared:  0.4996 
## F-statistic: 60.15 on 8 and 466 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_OPC :
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + sex + race + plate, data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0120785 -0.0027964 -0.0002854  0.0022440  0.0253175 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.353e-02  7.912e-04  17.097  < 2e-16 ***
## groupSCZ         1.570e-03  4.575e-04   3.431 0.000656 ***
## age              7.324e-07  1.078e-05   0.068 0.945882    
## sexM             1.132e-04  4.142e-04   0.273 0.784764    
## raceCAUC        -2.820e-04  3.861e-04  -0.731 0.465445    
## plateLieber_244  7.840e-05  6.946e-04   0.113 0.910177    
## plateLieber_289 -3.808e-04  7.432e-04  -0.512 0.608656    
## plateLieber_30  -5.446e-04  1.105e-03  -0.493 0.622473    
## plateLieber_315  2.803e-03  7.960e-04   3.521 0.000472 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004138 on 466 degrees of freedom
## Multiple R-squared:  0.09025,    Adjusted R-squared:  0.07463 
## F-statistic: 5.778 on 8 and 466 DF,  p-value: 4.874e-07

8 Model: per-cell-type, compare clr-transformed

if (cellnum == 7) {

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_Oligo, hseq_OPC) ~ group, data = hseq_ctp_pheno.clr.1e3))

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_Oligo, hseq_OPC) ~ group + age + sex + race + plate, data = hseq_ctp_pheno.clr.1e3))

} else if (cellnum == 9) {

  summary(lm(cbind(Exc_upper, Exc_inner, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_Oligo_MBP, NonN_OPC) ~ group, data = hseq_ctp_pheno.clr.1e3))

  summary(lm(cbind(Exc_upper, Exc_inner, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_Oligo_MBP, NonN_OPC) ~ group + age + sex + race + plate, data = hseq_ctp_pheno.clr.1e3))
  
}
## Response hseq_Exc :
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + sex + race + plate, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.86397 -0.09579  0.06049  0.19332  0.63794 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.5080197  0.0659040   7.708 7.75e-14 ***
## groupSCZ         0.0152478  0.0381106   0.400  0.68927    
## age              0.0026543  0.0008982   2.955  0.00328 ** 
## sexM            -0.0011447  0.0345018  -0.033  0.97355    
## raceCAUC         0.0482415  0.0321554   1.500  0.13422    
## plateLieber_244 -0.0280075  0.0578546  -0.484  0.62854    
## plateLieber_289  0.0081392  0.0619022   0.131  0.89545    
## plateLieber_30  -0.0509499  0.0920763  -0.553  0.58029    
## plateLieber_315  0.0403896  0.0663022   0.609  0.54271    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3447 on 466 degrees of freedom
## Multiple R-squared:  0.03853,    Adjusted R-squared:  0.02202 
## F-statistic: 2.334 on 8 and 466 DF,  p-value: 0.01825
## 
## 
## Response hseq_Inh :
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + sex + race + plate, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.27061 -0.06846 -0.00182  0.06219  0.40063 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.7516701  0.0199471  37.683   <2e-16 ***
## groupSCZ         0.0093056  0.0115349   0.807   0.4202    
## age             -0.0028461  0.0002718 -10.470   <2e-16 ***
## sexM             0.0112706  0.0104426   1.079   0.2810    
## raceCAUC        -0.0162136  0.0097324  -1.666   0.0964 .  
## plateLieber_244 -0.0116014  0.0175108  -0.663   0.5080    
## plateLieber_289  0.0024925  0.0187358   0.133   0.8942    
## plateLieber_30  -0.0387518  0.0278686  -1.391   0.1650    
## plateLieber_315 -0.0236333  0.0200676  -1.178   0.2395    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1043 on 466 degrees of freedom
## Multiple R-squared:  0.2252, Adjusted R-squared:  0.2119 
## F-statistic: 16.93 on 8 and 466 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Astro :
## 
## Call:
## lm(formula = hseq_Astro ~ group + age + sex + race + plate, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.50958 -0.09160  0.00885  0.09068  0.51723 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.5726063  0.0292106  19.603   <2e-16 ***
## groupSCZ         0.0217338  0.0168917   1.287   0.1989    
## age             -0.0007277  0.0003981  -1.828   0.0682 .  
## sexM            -0.0183629  0.0152922  -1.201   0.2304    
## raceCAUC         0.0006700  0.0142522   0.047   0.9625    
## plateLieber_244 -0.0175762  0.0256428  -0.685   0.4934    
## plateLieber_289  0.0137425  0.0274369   0.501   0.6167    
## plateLieber_30  -0.0263020  0.0408109  -0.644   0.5196    
## plateLieber_315  0.0706823  0.0293871   2.405   0.0166 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1528 on 466 degrees of freedom
## Multiple R-squared:  0.04719,    Adjusted R-squared:  0.03083 
## F-statistic: 2.885 on 8 and 466 DF,  p-value: 0.003816
## 
## 
## Response hseq_Endo :
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + sex + race + plate, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23890 -0.05307 -0.00888  0.04262  0.44593 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -0.6179512  0.0156704 -39.434  < 2e-16 ***
## groupSCZ         0.0196447  0.0090618   2.168 0.030674 *  
## age             -0.0020310  0.0002136  -9.510  < 2e-16 ***
## sexM            -0.0271763  0.0082037  -3.313 0.000996 ***
## raceCAUC        -0.0161847  0.0076458  -2.117 0.034805 *  
## plateLieber_244  0.0011504  0.0137564   0.084 0.933387    
## plateLieber_289  0.0518700  0.0147188   3.524 0.000467 ***
## plateLieber_30  -0.0393855  0.0218935  -1.799 0.072672 .  
## plateLieber_315  0.0231154  0.0157651   1.466 0.143257    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08195 on 466 degrees of freedom
## Multiple R-squared:  0.207,  Adjusted R-squared:  0.1934 
## F-statistic: 15.21 on 8 and 466 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Micro :
## 
## Call:
## lm(formula = hseq_Micro ~ group + age + sex + race + plate, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41347 -0.07946 -0.01390  0.06343  0.59849 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -0.5967330  0.0261344 -22.833   <2e-16 ***
## groupSCZ        -0.0280080  0.0151128  -1.853   0.0645 .  
## age             -0.0056723  0.0003562 -15.926   <2e-16 ***
## sexM             0.0152059  0.0136817   1.111   0.2670    
## raceCAUC         0.0056366  0.0127513   0.442   0.6587    
## plateLieber_244  0.0224005  0.0229423   0.976   0.3294    
## plateLieber_289  0.0188220  0.0245474   0.767   0.4436    
## plateLieber_30   0.0302221  0.0365130   0.828   0.4083    
## plateLieber_315  0.0014145  0.0262923   0.054   0.9571    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1367 on 466 degrees of freedom
## Multiple R-squared:  0.437,  Adjusted R-squared:  0.4273 
## F-statistic: 45.21 on 8 and 466 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Oligo :
## 
## Call:
## lm(formula = hseq_Oligo ~ group + age + sex + race + plate, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77492 -0.18186  0.00651  0.15366  1.08763 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.2818214  0.0523952  24.464  < 2e-16 ***
## groupSCZ        -0.1075871  0.0302988  -3.551 0.000423 ***
## age              0.0067483  0.0007141   9.451  < 2e-16 ***
## sexM             0.0174922  0.0274297   0.638 0.523976    
## raceCAUC        -0.0004271  0.0255643  -0.017 0.986677    
## plateLieber_244  0.0320405  0.0459957   0.697 0.486401    
## plateLieber_289 -0.0550819  0.0492137  -1.119 0.263615    
## plateLieber_30   0.1197180  0.0732028   1.635 0.102634    
## plateLieber_315 -0.2234117  0.0527118  -4.238 2.72e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.274 on 466 degrees of freedom
## Multiple R-squared:  0.2238, Adjusted R-squared:  0.2104 
## F-statistic: 16.79 on 8 and 466 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_OPC :
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + sex + race + plate, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.34377 -0.14034  0.00223  0.14857  0.87070 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.8994333  0.0510210 -37.228  < 2e-16 ***
## groupSCZ         0.0696632  0.0295041   2.361  0.01863 *  
## age              0.0018744  0.0006953   2.696  0.00728 ** 
## sexM             0.0027153  0.0267103   0.102  0.91907    
## raceCAUC        -0.0217226  0.0248938  -0.873  0.38333    
## plateLieber_244  0.0015937  0.0447894   0.036  0.97163    
## plateLieber_289 -0.0399845  0.0479229  -0.834  0.40451    
## plateLieber_30   0.0054491  0.0712829   0.076  0.93910    
## plateLieber_315  0.1114432  0.0513293   2.171  0.03042 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2668 on 466 degrees of freedom
## Multiple R-squared:  0.07323,    Adjusted R-squared:  0.05732 
## F-statistic: 4.603 on 8 and 466 DF,  p-value: 2.021e-05

8.1 Adjust for oligodendrocyte proportion

if (cellnum == 7) {

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_OPC) ~ group, data = hseq_ctp_pheno_adjoligo.clr.1e3))

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_OPC) ~ group + age + sex + race + plate, data = hseq_ctp_pheno_adjoligo.clr.1e3))

} else if (cellnum == 9) {
  
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_OPC) ~ group, data = methylcc_ctp_pheno_adjoligo.clr.1e3))

  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_OPC) ~ group + age + sex + race + plate, data = methylcc_ctp_pheno_adjoligo.clr.1e3))

}
## Response hseq_Exc :
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + sex + race + plate, data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.13851 -0.09089  0.03346  0.11879  0.56504 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.1912489  0.0430605  27.665   <2e-16 ***
## groupSCZ        -0.0486242  0.0249008  -1.953   0.0515 .  
## age              0.0060950  0.0005868  10.386   <2e-16 ***
## sexM             0.0007670  0.0225429   0.034   0.9729    
## raceCAUC         0.0424929  0.0210098   2.023   0.0437 *  
## plateLieber_244 -0.0078333  0.0378012  -0.207   0.8359    
## plateLieber_289 -0.0265788  0.0404458  -0.657   0.5114    
## plateLieber_30   0.0239837  0.0601610   0.399   0.6903    
## plateLieber_315 -0.0836577  0.0433207  -1.931   0.0541 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2252 on 466 degrees of freedom
## Multiple R-squared:  0.2108, Adjusted R-squared:  0.1972 
## F-statistic: 15.56 on 8 and 466 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Inh :
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + sex + race + plate, data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.34507 -0.09064 -0.01793  0.06232  0.80596 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.4348992  0.0290126  49.458  < 2e-16 ***
## groupSCZ        -0.0545664  0.0167772  -3.252  0.00123 ** 
## age              0.0005945  0.0003954   1.504  0.13335    
## sexM             0.0131823  0.0151886   0.868  0.38589    
## raceCAUC        -0.0219622  0.0141556  -1.551  0.12147    
## plateLieber_244  0.0085728  0.0254690   0.337  0.73657    
## plateLieber_289 -0.0322255  0.0272509  -1.183  0.23759    
## plateLieber_30   0.0361817  0.0405343   0.893  0.37252    
## plateLieber_315 -0.1476805  0.0291879  -5.060 6.05e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1517 on 466 degrees of freedom
## Multiple R-squared:  0.1443, Adjusted R-squared:  0.1296 
## F-statistic: 9.823 on 8 and 466 DF,  p-value: 1.195e-12
## 
## 
## Response hseq_Astro :
## 
## Call:
## lm(formula = hseq_Astro ~ group + age + sex + race + plate, data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64840 -0.10179  0.02165  0.11183  0.57189 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.5514471  0.0351508  15.688   <2e-16 ***
## groupSCZ         0.0267731  0.0203268   1.317   0.1884    
## age             -0.0007609  0.0004791  -1.588   0.1129    
## sexM            -0.0149457  0.0184020  -0.812   0.4171    
## raceCAUC         0.0034375  0.0171506   0.200   0.8412    
## plateLieber_244 -0.0196532  0.0308576  -0.637   0.5245    
## plateLieber_289  0.0173311  0.0330164   0.525   0.5999    
## plateLieber_30  -0.0338393  0.0491102  -0.689   0.4911    
## plateLieber_315  0.0768530  0.0353632   2.173   0.0303 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1838 on 466 degrees of freedom
## Multiple R-squared:  0.03931,    Adjusted R-squared:  0.02281 
## F-statistic: 2.383 on 8 and 466 DF,  p-value: 0.01593
## 
## 
## Response hseq_Endo :
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + sex + race + plate, data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.22228 -0.05780 -0.00727  0.05457  0.34315 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -0.6391104  0.0169220 -37.768  < 2e-16 ***
## groupSCZ         0.0246840  0.0097855   2.522 0.011985 *  
## age             -0.0020642  0.0002306  -8.951  < 2e-16 ***
## sexM            -0.0237591  0.0088589  -2.682 0.007580 ** 
## raceCAUC        -0.0134172  0.0082565  -1.625 0.104827    
## plateLieber_244 -0.0009266  0.0148552  -0.062 0.950293    
## plateLieber_289  0.0554586  0.0158944   3.489 0.000531 ***
## plateLieber_30  -0.0469228  0.0236422  -1.985 0.047763 *  
## plateLieber_315  0.0292861  0.0170242   1.720 0.086049 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0885 on 466 degrees of freedom
## Multiple R-squared:  0.191,  Adjusted R-squared:  0.1771 
## F-statistic: 13.75 on 8 and 466 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Micro :
## 
## Call:
## lm(formula = hseq_Micro ~ group + age + sex + race + plate, data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35838 -0.06033 -0.00699  0.05777  0.37375 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -0.6178923  0.0205669 -30.043   <2e-16 ***
## groupSCZ        -0.0229688  0.0118933  -1.931   0.0541 .  
## age             -0.0057055  0.0002803 -20.355   <2e-16 ***
## sexM             0.0186231  0.0107671   1.730   0.0844 .  
## raceCAUC         0.0084041  0.0100348   0.837   0.4027    
## plateLieber_244  0.0203235  0.0180548   1.126   0.2609    
## plateLieber_289  0.0224106  0.0193180   1.160   0.2466    
## plateLieber_30   0.0226848  0.0287345   0.789   0.4302    
## plateLieber_315  0.0075852  0.0206911   0.367   0.7141    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1076 on 466 degrees of freedom
## Multiple R-squared:  0.5538, Adjusted R-squared:  0.5462 
## F-statistic: 72.31 on 8 and 466 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_OPC :
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + sex + race + plate, data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.26886 -0.12731  0.01318  0.13451  0.67980 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.9205925  0.0454761 -42.233  < 2e-16 ***
## groupSCZ         0.0747024  0.0262976   2.841  0.00470 ** 
## age              0.0018412  0.0006198   2.971  0.00312 ** 
## sexM             0.0061325  0.0238074   0.258  0.79684    
## raceCAUC        -0.0189550  0.0221884  -0.854  0.39339    
## plateLieber_244 -0.0004833  0.0399217  -0.012  0.99035    
## plateLieber_289 -0.0363959  0.0427147  -0.852  0.39461    
## plateLieber_30  -0.0020882  0.0635359  -0.033  0.97380    
## plateLieber_315  0.1176139  0.0457509   2.571  0.01046 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2378 on 466 degrees of freedom
## Multiple R-squared:  0.09443,    Adjusted R-squared:  0.07889 
## F-statistic: 6.074 on 8 and 466 DF,  p-value: 1.894e-07

8.2 Check per-batch effect for Endo, Oligo, OPC

#if (cellnum == 7) {
  hseq_ctp_pheno.clr.1e3$age2 <- hseq_ctp_pheno.clr.1e3$age^2
  summary(lm(hseq_Endo ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>%   filter(plate == "Lieber_289") %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_289") %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.19742 -0.06561 -0.00325  0.06120  0.27753 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.014e-01  6.634e-02  -7.558 1.68e-11 ***
## groupSCZ    -2.285e-02  1.838e-02  -1.243   0.2167    
## age         -5.084e-03  2.410e-03  -2.110   0.0373 *  
## age2         3.587e-05  2.190e-05   1.638   0.1045    
## sexM        -6.878e-03  1.812e-02  -0.380   0.7050    
## raceCAUC    -2.608e-02  1.767e-02  -1.476   0.1429    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08763 on 104 degrees of freedom
## Multiple R-squared:  0.1277, Adjusted R-squared:  0.08572 
## F-statistic: 3.044 on 5 and 104 DF,  p-value: 0.01323
  summary(lm(hseq_Endo ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>%   filter(plate == "Lieber_315") %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_315") %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.19794 -0.08223 -0.01875  0.04247  0.42011 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -5.126e-01  1.651e-01  -3.105  0.00301 **
## groupSCZ     6.479e-02  3.456e-02   1.875  0.06615 . 
## age         -2.846e-03  6.612e-03  -0.430  0.66861   
## age2        -1.223e-05  6.733e-05  -0.182  0.85657   
## sexM        -4.859e-02  3.581e-02  -1.357  0.18040   
## raceCAUC    -4.150e-02  3.218e-02  -1.290  0.20261   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1246 on 55 degrees of freedom
## Multiple R-squared:  0.2578, Adjusted R-squared:  0.1903 
## F-statistic: 3.821 on 5 and 55 DF,  p-value: 0.004833
  print("N.B. Lieber_104 has CONTROLS ONLY")
## [1] "N.B. Lieber_104 has CONTROLS ONLY"
  summary(lm(hseq_Endo ~ age +  age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% filter(plate ==   "Lieber_104") %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Endo ~ age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_104") %>% filter(age > 18))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.099286 -0.031401 -0.005334  0.020072  0.164171 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.666e-01  8.519e-02  -8.999 9.36e-10 ***
## age          3.217e-03  3.493e-03   0.921    0.365    
## age2        -4.851e-05  3.414e-05  -1.421    0.166    
## sexM        -2.713e-02  1.886e-02  -1.439    0.161    
## raceCAUC     1.782e-03  1.859e-02   0.096    0.924    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05281 on 28 degrees of freedom
## Multiple R-squared:  0.2877, Adjusted R-squared:  0.186 
## F-statistic: 2.828 on 4 and 28 DF,  p-value: 0.04351
  print("N.B. Lieber_30 has SCHIZOPHRENIA PATIENTS ONLY")
## [1] "N.B. Lieber_30 has SCHIZOPHRENIA PATIENTS ONLY"
  summary(lm(hseq_Endo ~ age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% filter(plate ==   "Lieber_30")))
## 
## Call:
## lm(formula = hseq_Endo ~ age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_30"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.108288 -0.044967 -0.001181  0.038219  0.100093 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -5.825e-01  1.551e-01  -3.756  0.00116 **
## age         -6.559e-03  6.718e-03  -0.976  0.34005   
## age2         6.244e-05  7.188e-05   0.869  0.39478   
## sexM        -3.153e-02  2.740e-02  -1.150  0.26291   
## raceCAUC     1.470e-02  2.456e-02   0.598  0.55593   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06063 on 21 degrees of freedom
## Multiple R-squared:  0.1248, Adjusted R-squared:  -0.04195 
## F-statistic: 0.7484 on 4 and 21 DF,  p-value: 0.5701
  summary(lm(hseq_Endo ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>%   filter(plate == "Lieber_244") %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_244") %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.14123 -0.04329 -0.01221  0.03034  0.23509 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.321e-01  4.517e-02 -13.993   <2e-16 ***
## groupSCZ     2.387e-02  1.123e-02   2.125   0.0350 *  
## age         -2.546e-03  2.198e-03  -1.158   0.2485    
## age2         1.245e-05  2.589e-05   0.481   0.6312    
## sexM        -2.164e-02  1.242e-02  -1.742   0.0834 .  
## raceCAUC    -7.908e-03  1.110e-02  -0.712   0.4772    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06951 on 166 degrees of freedom
## Multiple R-squared:  0.09805,    Adjusted R-squared:  0.07089 
## F-statistic: 3.609 on 5 and 166 DF,  p-value: 0.003991
  summary(lm(hseq_Endo ~ group + age + age2 + sex + race + plate, data = hseq_ctp_pheno.clr.1e3 %>%   filter(plate %in% c("Lieber_289", "Lieber_244", "Lieber_315")) %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + age2 + sex + race + plate, 
##     data = hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", 
##         "Lieber_244", "Lieber_315")) %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23279 -0.06005 -0.00848  0.05051  0.43572 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -5.987e-01  3.593e-02 -16.663  < 2e-16 ***
## groupSCZ         1.981e-02  1.020e-02   1.943   0.0529 .  
## age             -3.141e-03  1.463e-03  -2.147   0.0325 *  
## age2             1.303e-05  1.496e-05   0.872   0.3841    
## sexM            -2.342e-02  1.064e-02  -2.201   0.0284 *  
## raceCAUC        -2.213e-02  9.923e-03  -2.230   0.0264 *  
## plateLieber_289  5.183e-02  1.148e-02   4.513 8.84e-06 ***
## plateLieber_315  3.196e-02  1.351e-02   2.366   0.0186 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08885 on 335 degrees of freedom
## Multiple R-squared:  0.1306, Adjusted R-squared:  0.1124 
## F-statistic: 7.187 on 7 and 335 DF,  p-value: 5.067e-08
  summary(lm(hseq_Endo ~ group + age + age2 + sex + race + plate, data = hseq_ctp_pheno.clr.1e3 %>%   filter(plate %in% c("Lieber_289", "Lieber_244", "Lieber_315", "Lieber_30")) %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + age2 + sex + race + plate, 
##     data = hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", 
##         "Lieber_244", "Lieber_315", "Lieber_30")) %>% filter(age > 
##         18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23523 -0.05803 -0.00829  0.04808  0.43860 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -5.989e-01  3.476e-02 -17.227  < 2e-16 ***
## groupSCZ         1.851e-02  9.969e-03   1.856   0.0642 .  
## age             -3.187e-03  1.419e-03  -2.246   0.0253 *  
## age2             1.435e-05  1.453e-05   0.988   0.3241    
## sexM            -2.526e-02  1.007e-02  -2.510   0.0125 *  
## raceCAUC        -1.855e-02  9.356e-03  -1.983   0.0481 *  
## plateLieber_289  5.027e-02  1.123e-02   4.476 1.02e-05 ***
## plateLieber_30  -3.687e-02  1.910e-02  -1.930   0.0544 .  
## plateLieber_315  3.113e-02  1.325e-02   2.350   0.0193 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08726 on 360 degrees of freedom
## Multiple R-squared:  0.1405, Adjusted R-squared:  0.1214 
## F-statistic: 7.354 on 8 and 360 DF,  p-value: 4.53e-09
  summary(lm(hseq_Endo ~ group + age + age2 + sex + race + plate, data = hseq_ctp_pheno.clr.1e3 %>%   filter(plate %in% c("Lieber_289", "Lieber_244", "Lieber_315", "Lieber_30",   "Lieber_104")) %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + age2 + sex + race + plate, 
##     data = hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", 
##         "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104")) %>% 
##         filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23618 -0.05589 -0.00780  0.04487  0.43900 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -6.194e-01  3.574e-02 -17.332  < 2e-16 ***
## groupSCZ         1.824e-02  9.656e-03   1.889  0.05967 .  
## age             -2.827e-03  1.338e-03  -2.113  0.03524 *  
## age2             1.063e-05  1.363e-05   0.780  0.43574    
## sexM            -2.522e-02  9.292e-03  -2.715  0.00693 ** 
## raceCAUC        -1.711e-02  8.693e-03  -1.969  0.04967 *  
## plateLieber_244  1.214e-02  1.739e-02   0.698  0.48531    
## plateLieber_289  6.238e-02  1.757e-02   3.551  0.00043 ***
## plateLieber_30  -2.492e-02  2.473e-02  -1.008  0.31412    
## plateLieber_315  4.290e-02  1.915e-02   2.240  0.02563 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08497 on 392 degrees of freedom
## Multiple R-squared:  0.1566, Adjusted R-squared:  0.1372 
## F-statistic: 8.084 on 9 and 392 DF,  p-value: 5.079e-11
  summary(lm(hseq_Endo ~ group + age + age2 + sex + race + plate, data = hseq_ctp_pheno.clr.1e3 %>%   filter(plate %in% c("Lieber_289", "Lieber_244", "Lieber_315", "Lieber_30",   "Lieber_104")) %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + age2 + sex + race + plate, 
##     data = hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", 
##         "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104")) %>% 
##         filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23618 -0.05589 -0.00780  0.04487  0.43900 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -6.194e-01  3.574e-02 -17.332  < 2e-16 ***
## groupSCZ         1.824e-02  9.656e-03   1.889  0.05967 .  
## age             -2.827e-03  1.338e-03  -2.113  0.03524 *  
## age2             1.063e-05  1.363e-05   0.780  0.43574    
## sexM            -2.522e-02  9.292e-03  -2.715  0.00693 ** 
## raceCAUC        -1.711e-02  8.693e-03  -1.969  0.04967 *  
## plateLieber_244  1.214e-02  1.739e-02   0.698  0.48531    
## plateLieber_289  6.238e-02  1.757e-02   3.551  0.00043 ***
## plateLieber_30  -2.492e-02  2.473e-02  -1.008  0.31412    
## plateLieber_315  4.290e-02  1.915e-02   2.240  0.02563 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08497 on 392 degrees of freedom
## Multiple R-squared:  0.1566, Adjusted R-squared:  0.1372 
## F-statistic: 8.084 on 9 and 392 DF,  p-value: 5.079e-11
  summary(lm(hseq_OPC ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% filter(plate == "Lieber_289") %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_289") %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.29774 -0.11793  0.01113  0.17466  0.75870 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.923e+00  2.135e-01  -9.008  1.1e-14 ***
## groupSCZ    -2.229e-02  5.915e-02  -0.377    0.707    
## age          2.426e-03  7.755e-03   0.313    0.755    
## age2         2.631e-05  7.049e-05   0.373    0.710    
## sexM        -9.088e-02  5.831e-02  -1.558    0.122    
## raceCAUC    -6.201e-02  5.686e-02  -1.091    0.278    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.282 on 104 degrees of freedom
## Multiple R-squared:  0.1411, Adjusted R-squared:  0.09985 
## F-statistic: 3.418 on 5 and 104 DF,  p-value: 0.00671
  summary(lm(hseq_OPC ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% filter(plate == "Lieber_315") %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_315") %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39878 -0.14861 -0.03632  0.13132  0.74278 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.701e+00  2.995e-01  -5.679 5.28e-07 ***
## groupSCZ     4.290e-02  6.269e-02   0.684   0.4966    
## age         -2.529e-03  1.199e-02  -0.211   0.8337    
## age2         6.066e-05  1.221e-04   0.497   0.6214    
## sexM         3.086e-02  6.496e-02   0.475   0.6367    
## raceCAUC    -1.202e-01  5.838e-02  -2.059   0.0442 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.226 on 55 degrees of freedom
## Multiple R-squared:  0.1116, Adjusted R-squared:  0.03088 
## F-statistic: 1.382 on 5 and 55 DF,  p-value: 0.2451
  print("N.B. Lieber_104 has CONTROLS ONLY")
## [1] "N.B. Lieber_104 has CONTROLS ONLY"
  summary(lm(hseq_OPC ~ age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>%   filter(plate == "Lieber_104") %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_OPC ~ age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_104") %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.60333 -0.18295  0.01445  0.16169  0.38582 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.4970450  0.4017262  -3.727  0.00087 ***
## age         -0.0200659  0.0164721  -1.218  0.23332    
## age2         0.0002466  0.0001610   1.532  0.13678    
## sexM        -0.1143812  0.0889396  -1.286  0.20896    
## raceCAUC     0.0729164  0.0876793   0.832  0.41266    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.249 on 28 degrees of freedom
## Multiple R-squared:  0.2589, Adjusted R-squared:  0.153 
## F-statistic: 2.445 on 4 and 28 DF,  p-value: 0.06974
  print("N.B. Lieber_30 has SCHIZOPHRENIA PATIENTS ONLY")
## [1] "N.B. Lieber_30 has SCHIZOPHRENIA PATIENTS ONLY"
  summary(lm(hseq_OPC ~ age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>%   filter(plate == "Lieber_30") %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_OPC ~ age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_30") %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.26579 -0.14470 -0.02687  0.06684  0.57730 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -2.1251470  0.5833985  -3.643  0.00152 **
## age          0.0216504  0.0252754   0.857  0.40135   
## age2        -0.0002815  0.0002704  -1.041  0.30966   
## sexM        -0.0572814  0.1031024  -0.556  0.58437   
## raceCAUC     0.1350859  0.0924041   1.462  0.15857   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2281 on 21 degrees of freedom
## Multiple R-squared:  0.1484, Adjusted R-squared:  -0.01377 
## F-statistic: 0.9151 on 4 and 21 DF,  p-value: 0.4735
  summary(lm(hseq_OPC ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3   %>% filter(plate == "Lieber_244") %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_244") %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.61335 -0.15036 -0.00634  0.12620  0.86896 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.953e+00  1.786e-01 -10.935  < 2e-16 ***
## groupSCZ     1.157e-01  4.439e-02   2.607  0.00996 ** 
## age          1.376e-03  8.689e-03   0.158  0.87438    
## age2        -4.256e-06  1.023e-04  -0.042  0.96688    
## sexM         8.632e-02  4.911e-02   1.758  0.08064 .  
## raceCAUC     6.204e-03  4.388e-02   0.141  0.88773    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2748 on 166 degrees of freedom
## Multiple R-squared:  0.06739,    Adjusted R-squared:  0.0393 
## F-statistic: 2.399 on 5 and 166 DF,  p-value: 0.03936
  summary(lm(hseq_OPC ~ group + age + age2 + sex + race + plate, data =   hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", "Lieber_244",   "Lieber_315")) %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + age2 + sex + race + plate, 
##     data = hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", 
##         "Lieber_244", "Lieber_315")) %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.36904 -0.14920 -0.00405  0.14398  0.85803 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.834e+00  1.099e-01 -16.684   <2e-16 ***
## groupSCZ         6.383e-02  3.119e-02   2.046   0.0415 *  
## age             -2.233e-03  4.476e-03  -0.499   0.6181    
## age2             5.650e-05  4.575e-05   1.235   0.2177    
## sexM             2.491e-02  3.256e-02   0.765   0.4449    
## raceCAUC        -3.846e-02  3.035e-02  -1.267   0.2061    
## plateLieber_289 -6.579e-02  3.513e-02  -1.873   0.0620 .  
## plateLieber_315  8.248e-02  4.133e-02   1.995   0.0468 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2718 on 335 degrees of freedom
## Multiple R-squared:  0.08266,    Adjusted R-squared:  0.06349 
## F-statistic: 4.312 on 7 and 335 DF,  p-value: 0.0001379
  summary(lm(hseq_OPC ~ group + age + age2 + sex + race + plate, data =   hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", "Lieber_244",   "Lieber_315", "Lieber_30")) %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + age2 + sex + race + plate, 
##     data = hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", 
##         "Lieber_244", "Lieber_315", "Lieber_30")) %>% filter(age > 
##         18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.36606 -0.14465 -0.00375  0.14039  0.86211 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.829e+00  1.075e-01 -17.021   <2e-16 ***
## groupSCZ         6.450e-02  3.082e-02   2.093   0.0371 *  
## age             -2.220e-03  4.387e-03  -0.506   0.6132    
## age2             5.325e-05  4.492e-05   1.185   0.2366    
## sexM             2.136e-02  3.112e-02   0.687   0.4928    
## raceCAUC        -2.892e-02  2.893e-02  -1.000   0.3181    
## plateLieber_289 -6.511e-02  3.472e-02  -1.875   0.0616 .  
## plateLieber_30  -1.500e-03  5.906e-02  -0.025   0.9798    
## plateLieber_315  8.312e-02  4.097e-02   2.029   0.0432 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2698 on 360 degrees of freedom
## Multiple R-squared:  0.07404,    Adjusted R-squared:  0.05347 
## F-statistic: 3.598 on 8 and 360 DF,  p-value: 0.000489
  summary(lm(hseq_OPC ~ group + age + age2 + sex + race + plate, data =   hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", "Lieber_244",   "Lieber_315", "Lieber_30", "Lieber_104")) %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + age2 + sex + race + plate, 
##     data = hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", 
##         "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104")) %>% 
##         filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.36823 -0.14284 -0.00368  0.14881  0.87335 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.837e+00  1.131e-01 -16.246   <2e-16 ***
## groupSCZ         5.941e-02  3.055e-02   1.945   0.0525 .  
## age             -3.853e-03  4.233e-03  -0.910   0.3633    
## age2             7.222e-05  4.312e-05   1.675   0.0947 .  
## sexM             3.992e-03  2.940e-02   0.136   0.8921    
## raceCAUC        -1.636e-02  2.750e-02  -0.595   0.5523    
## plateLieber_244  4.866e-02  5.501e-02   0.884   0.3770    
## plateLieber_289 -2.319e-02  5.558e-02  -0.417   0.6767    
## plateLieber_30   4.948e-02  7.823e-02   0.633   0.5274    
## plateLieber_315  1.293e-01  6.058e-02   2.135   0.0334 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2688 on 392 degrees of freedom
## Multiple R-squared:  0.07767,    Adjusted R-squared:  0.05649 
## F-statistic: 3.668 on 9 and 392 DF,  p-value: 0.0002012
  summary(lm(hseq_OPC ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3   %>% filter(plate %in% c("Lieber_289", "Lieber_244", "Lieber_315", "Lieber_30",   "Lieber_104")) %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate %in% c("Lieber_289", "Lieber_244", "Lieber_315", 
##         "Lieber_30", "Lieber_104")) %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.41759 -0.14155 -0.01273  0.13482  0.87703 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.830e+00  1.039e-01 -17.611   <2e-16 ***
## groupSCZ     6.959e-02  2.804e-02   2.482   0.0135 *  
## age         -2.106e-03  4.245e-03  -0.496   0.6200    
## age2         5.004e-05  4.311e-05   1.161   0.2464    
## sexM         8.083e-03  2.936e-02   0.275   0.7832    
## raceCAUC    -2.586e-02  2.741e-02  -0.943   0.3460    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2719 on 396 degrees of freedom
## Multiple R-squared:  0.04702,    Adjusted R-squared:  0.03498 
## F-statistic: 3.907 on 5 and 396 DF,  p-value: 0.001809
  summary(lm(hseq_Oligo ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3   %>% filter(plate == "Lieber_289") %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Oligo ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_289") %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.47854 -0.15353 -0.02756  0.10883  0.84348 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.104e+00  1.809e-01   6.106 1.79e-08 ***
## groupSCZ    -1.770e-03  5.012e-02  -0.035   0.9719    
## age          1.362e-02  6.571e-03   2.073   0.0407 *  
## age2        -9.538e-05  5.972e-05  -1.597   0.1133    
## sexM         5.635e-02  4.941e-02   1.141   0.2567    
## raceCAUC    -2.744e-02  4.818e-02  -0.570   0.5701    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2389 on 104 degrees of freedom
## Multiple R-squared:  0.07995,    Adjusted R-squared:  0.03571 
## F-statistic: 1.807 on 5 and 104 DF,  p-value: 0.1178
  summary(lm(hseq_Oligo ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3   %>% filter(plate == "Lieber_315") %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Oligo ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_315") %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71344 -0.09082  0.01338  0.17530  0.47906 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.249e+00  3.325e-01   3.758 0.000416 ***
## groupSCZ    -1.406e-01  6.959e-02  -2.021 0.048162 *  
## age          2.956e-03  1.331e-02   0.222 0.825142    
## age2         2.656e-05  1.356e-04   0.196 0.845401    
## sexM        -7.864e-03  7.212e-02  -0.109 0.913565    
## raceCAUC    -2.969e-02  6.481e-02  -0.458 0.648613    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2509 on 55 degrees of freedom
## Multiple R-squared:  0.1294, Adjusted R-squared:  0.05021 
## F-statistic: 1.634 on 5 and 55 DF,  p-value: 0.1663
  print("N.B. Lieber_104 has CONTROLS ONLY")
## [1] "N.B. Lieber_104 has CONTROLS ONLY"
  summary(lm(hseq_Oligo ~ age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>%   filter(plate == "Lieber_104") %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Oligo ~ age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_104") %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55761 -0.10960  0.02391  0.14218  0.31372 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.7778123  0.3531314   5.034 2.52e-05 ***
## age         -0.0104189  0.0144796  -0.720    0.478    
## age2         0.0001568  0.0001415   1.108    0.277    
## sexM        -0.0204210  0.0781810  -0.261    0.796    
## raceCAUC    -0.0362994  0.0770732  -0.471    0.641    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2189 on 28 degrees of freedom
## Multiple R-squared:  0.2025, Adjusted R-squared:  0.08858 
## F-statistic: 1.778 on 4 and 28 DF,  p-value: 0.1614
  print("N.B. Lieber_30 has SCHIZOPHRENIA PATIENTS ONLY")
## [1] "N.B. Lieber_30 has SCHIZOPHRENIA PATIENTS ONLY"
  summary(lm(hseq_Oligo ~ age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>%   filter(plate == "Lieber_30") %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Oligo ~ age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_30") %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31011 -0.15776 -0.00556  0.09364  0.49399 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  1.3160367  0.5549445   2.371   0.0274 *
## age          0.0131504  0.0240427   0.547   0.5902  
## age2        -0.0001689  0.0002572  -0.657   0.5185  
## sexM         0.0381252  0.0980738   0.389   0.7014  
## raceCAUC     0.1199896  0.0878973   1.365   0.1867  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.217 on 21 degrees of freedom
## Multiple R-squared:  0.1131, Adjusted R-squared:  -0.05579 
## F-statistic: 0.6698 on 4 and 21 DF,  p-value: 0.6202
  summary(lm(hseq_Oligo ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3   %>% filter(plate == "Lieber_244") %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Oligo ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate == "Lieber_244") %>% filter(age > 18))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7822 -0.1805  0.0129  0.1444  0.8297 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.621e+00  1.666e-01   9.733  < 2e-16 ***
## groupSCZ    -1.286e-01  4.141e-02  -3.106  0.00223 ** 
## age         -2.982e-03  8.106e-03  -0.368  0.71346    
## age2         6.642e-05  9.546e-05   0.696  0.48756    
## sexM         3.617e-02  4.581e-02   0.790  0.43094    
## raceCAUC    -5.958e-02  4.093e-02  -1.456  0.14737    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2563 on 166 degrees of freedom
## Multiple R-squared:  0.07893,    Adjusted R-squared:  0.05119 
## F-statistic: 2.845 on 5 and 166 DF,  p-value: 0.01714
  summary(lm(hseq_Oligo ~ group + age + age2 + sex + race + plate, data =   hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", "Lieber_244",   "Lieber_315")) %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Oligo ~ group + age + age2 + sex + race + plate, 
##     data = hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", 
##         "Lieber_244", "Lieber_315")) %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77522 -0.16184  0.00353  0.15216  0.88707 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.446e+00  1.010e-01  14.311  < 2e-16 ***
## groupSCZ        -9.039e-02  2.867e-02  -3.153  0.00176 ** 
## age              4.148e-03  4.114e-03   1.008  0.31398    
## age2            -8.456e-06  4.205e-05  -0.201  0.84075    
## sexM             3.091e-02  2.992e-02   1.033  0.30237    
## raceCAUC        -3.850e-02  2.790e-02  -1.380  0.16848    
## plateLieber_289 -4.348e-02  3.229e-02  -1.347  0.17899    
## plateLieber_315 -2.087e-01  3.799e-02  -5.495 7.74e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2498 on 335 degrees of freedom
## Multiple R-squared:  0.1295, Adjusted R-squared:  0.1113 
## F-statistic: 7.117 on 7 and 335 DF,  p-value: 6.146e-08
  summary(lm(hseq_Oligo ~ group + age + age2 + sex + race + plate, data =   hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", "Lieber_244",   "Lieber_315", "Lieber_30")) %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Oligo ~ group + age + age2 + sex + race + plate, 
##     data = hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", 
##         "Lieber_244", "Lieber_315", "Lieber_30")) %>% filter(age > 
##         18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76944 -0.16424  0.00063  0.14466  0.89559 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.450e+00  9.881e-02  14.674  < 2e-16 ***
## groupSCZ        -8.921e-02  2.833e-02  -3.148  0.00178 ** 
## age              3.940e-03  4.033e-03   0.977  0.32919    
## age2            -8.931e-06  4.130e-05  -0.216  0.82889    
## sexM             3.262e-02  2.861e-02   1.140  0.25500    
## raceCAUC        -2.973e-02  2.659e-02  -1.118  0.26427    
## plateLieber_289 -4.268e-02  3.192e-02  -1.337  0.18206    
## plateLieber_30   8.747e-02  5.430e-02   1.611  0.10807    
## plateLieber_315 -2.076e-01  3.766e-02  -5.512 6.77e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.248 on 360 degrees of freedom
## Multiple R-squared:  0.126,  Adjusted R-squared:  0.1066 
## F-statistic: 6.489 on 8 and 360 DF,  p-value: 6.673e-08
  summary(lm(hseq_Oligo ~ group + age + age2 + sex + race + plate, data =   hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", "Lieber_244",   "Lieber_315", "Lieber_30", "Lieber_104")) %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Oligo ~ group + age + age2 + sex + race + plate, 
##     data = hseq_ctp_pheno.clr.1e3 %>% filter(plate %in% c("Lieber_289", 
##         "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104")) %>% 
##         filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77244 -0.16232  0.00361  0.14571  0.89195 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.513e+00  1.033e-01  14.653  < 2e-16 ***
## groupSCZ        -9.216e-02  2.790e-02  -3.304  0.00104 ** 
## age              2.778e-03  3.866e-03   0.719  0.47283    
## age2             5.933e-06  3.938e-05   0.151  0.88032    
## sexM             2.583e-02  2.685e-02   0.962  0.33666    
## raceCAUC        -2.829e-02  2.512e-02  -1.126  0.26070    
## plateLieber_244 -3.789e-02  5.024e-02  -0.754  0.45121    
## plateLieber_289 -8.467e-02  5.075e-02  -1.668  0.09606 .  
## plateLieber_30   5.098e-02  7.144e-02   0.714  0.47589    
## plateLieber_315 -2.469e-01  5.532e-02  -4.462 1.06e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2455 on 392 degrees of freedom
## Multiple R-squared:  0.1445, Adjusted R-squared:  0.1248 
## F-statistic: 7.355 on 9 and 392 DF,  p-value: 6.295e-10
  summary(lm(hseq_Oligo ~ group + age + age2 +sex + race, data = hseq_ctp_pheno.clr.1e3   %>% filter(plate %in% c("Lieber_289", "Lieber_244", "Lieber_315", "Lieber_30",   "Lieber_104")) %>% filter(age > 18)))
## 
## Call:
## lm(formula = hseq_Oligo ~ group + age + age2 + sex + race, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(plate %in% c("Lieber_289", "Lieber_244", "Lieber_315", 
##         "Lieber_30", "Lieber_104")) %>% filter(age > 18))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84385 -0.15780 -0.00483  0.15476  0.88333 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.491e+00  9.839e-02  15.159  < 2e-16 ***
## groupSCZ    -8.318e-02  2.654e-02  -3.134  0.00185 ** 
## age          8.014e-04  4.018e-03   0.199  0.84202    
## age2         2.132e-05  4.081e-05   0.522  0.60170    
## sexM         3.071e-02  2.779e-02   1.105  0.26987    
## raceCAUC    -3.623e-02  2.595e-02  -1.397  0.16334    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2574 on 396 degrees of freedom
## Multiple R-squared:  0.05017,    Adjusted R-squared:  0.03818 
## F-statistic: 4.183 on 5 and 396 DF,  p-value: 0.001024
#}

9 Model: per-cell-type, compare ratios between other CTPs and Inh

  • dealing with this directly as a ratio, so no need to do transform (what an alr would do anyway)
hseq_ctp_pheno.tmp.clr0_ratio <- data.frame(hseq_ctp_pheno.tmp.clr0)
hseq_ctp_pheno.tmp.clr0_ratio$exc_inh <- hseq_ctp_pheno.tmp.clr0_ratio$hseq_Exc/hseq_ctp_pheno.tmp.clr0_ratio$hseq_Inh

hseq_ctp_pheno.tmp.clr0_ratio.df <- data.frame(keep_pheno_col, hseq_ctp_pheno.tmp.clr0_ratio)

summary(lm(exc_inh ~ group, data = hseq_ctp_pheno.tmp.clr0_ratio.df))
## 
## Call:
## lm(formula = exc_inh ~ group, data = hseq_ctp_pheno.tmp.clr0_ratio.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.91058 -0.11371  0.03391  0.15500  0.62441 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.02568    0.01473   69.63  < 2e-16 ***
## groupSCZ     0.08192    0.02354    3.48 0.000547 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2504 on 473 degrees of freedom
## Multiple R-squared:  0.02497,    Adjusted R-squared:  0.02291 
## F-statistic: 12.11 on 1 and 473 DF,  p-value: 0.000547
summary(lm(exc_inh ~ group + age + sex + race, data = hseq_ctp_pheno.tmp.clr0_ratio.df))
## 
## Call:
## lm(formula = exc_inh ~ group + age + sex + race, data = hseq_ctp_pheno.tmp.clr0_ratio.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0013 -0.1140  0.0290  0.1431  0.5535 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.8431804  0.0326782  25.803  < 2e-16 ***
## groupSCZ     0.0052062  0.0236701   0.220    0.826    
## age          0.0049231  0.0005875   8.379 6.21e-16 ***
## sexM        -0.0183000  0.0229913  -0.796    0.426    
## raceCAUC     0.0431809  0.0215801   2.001    0.046 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2334 on 470 degrees of freedom
## Multiple R-squared:  0.1585, Adjusted R-squared:  0.1513 
## F-statistic: 22.12 on 4 and 470 DF,  p-value: < 2.2e-16
summary(lm(exc_inh ~ group + age + sex + race + plate, data = hseq_ctp_pheno.tmp.clr0_ratio.df))
## 
## Call:
## lm(formula = exc_inh ~ group + age + sex + race + plate, data = hseq_ctp_pheno.tmp.clr0_ratio.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.99402 -0.11085  0.03048  0.14630  0.56094 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.8427067  0.0444616  18.954  < 2e-16 ***
## groupSCZ         0.0060711  0.0257110   0.236   0.8134    
## age              0.0047874  0.0006059   7.901 2.02e-14 ***
## sexM            -0.0168530  0.0232764  -0.724   0.4694    
## raceCAUC         0.0407172  0.0216934   1.877   0.0612 .  
## plateLieber_244 -0.0135644  0.0390312  -0.348   0.7284    
## plateLieber_289  0.0044508  0.0417618   0.107   0.9152    
## plateLieber_30   0.0002531  0.0621186   0.004   0.9968    
## plateLieber_315  0.0728140  0.0447303   1.628   0.1042    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2325 on 466 degrees of freedom
## Multiple R-squared:  0.1717, Adjusted R-squared:  0.1575 
## F-statistic: 12.07 on 8 and 466 DF,  p-value: 1.016e-15

10 Model: ALDEx2 for differential proportion

  • Documentation: https://www.bioconductor.org/packages/release/bioc/vignettes/ALDEx2/inst/doc/ALDEx2_vignette.pdf
  • Uses Bayesian sampling from a Dirichlet distribution to estimate underlying technical variation
  • Input count data, takes as given (requires integers, so rounded the back-derived count data)
  • –> infers technical variation (eg. sequencing the same sample again) multiple times using aldex.clr()
    • aldex.clr(): recommend mc.samples >128 for t-test, >1000 for rigorous effect size calculations, >16 for ANOVA
  • Outputs (see p10 of documentation):
    • Tables
      • we*: Welch’s t-test
      • wi*: Wilcoxon rank test
      • *ep: expected p-value
      • *eBH: expected BH-correction
      • rab*: median clr value for a given group
      • dif.btw: median difference in clr value between groups
      • dif.win: median of largest difference in clr values within groups
      • effect: median effect size (diff.btw/max(diff.win))
      • overlap: prop of effect size that overlaps 0 (ie. no effect)
    • Plots
      • relationships between Abundance (clr) vs Difference + Dispersion vs Difference
      • red denotes differentially abundant with q < 0.1, grey abundant but not differentially-abundant, black are rare but not differentially abundant)

10.1 houseman_seq

10.1.1 No covariates

conds <- hseq_ctp_pheno.clr.1e3$group

aldex_in <- sapply(data.frame(t(hseq_ctp_pheno.tmp[,c(level2_cols)])*10000), as.integer)
rownames(aldex_in) <- level2_cols
colnames(aldex_in) <- hseq_ctp_pheno.tmp$IID

x <- aldex.clr(aldex_in, conds, mc.samples=128, denom="all", verbose=F)
## operating in serial mode
## computing center with all features
x.tt <- aldex.ttest(x, paired.test = F)

x.effect <- aldex.effect(x, verbose = F)

x.all <- data.frame(x.tt, x.effect)

tmp <- rownames(x.all)
x.all <- as.data.frame(sapply(x.all, function(x) signif(x, 2)))
rownames(x.all) <- tmp

datatable(x.all)
par(mfrow=c(1,2))
aldex.plot(x.all, type="MA", test="welch", xlab="Log-ratio abundance",ylab="Difference")
aldex.plot(x.all, type="MW", test="welch", xlab="Dispersion",ylab="Difference")

10.1.2 glm + covariates

  • Runs a glm
  • Input model matrix
conds <- hseq_ctp_pheno.tmp$group
cov.aldex <- hseq_ctp_pheno.tmp %>% dplyr::select(c("group", "age", "sex", "race", "plate"))
mm <- model.matrix(~., cov.aldex)

x <- aldex.clr(aldex_in, mm, mc.samples=128, denom="all", verbose=F)

x.glm <- aldex.glm(x, mm)
## Warning in if (verbose) message("running tests for each MC instance:"): the
## condition has length > 1 and only the first element will be used
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## |--
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -(25%)
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -(50%)
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -(75%)
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -|
x.all2 <- data.frame(x.glm)

tmp <- rownames(x.all2)
x.all2 <- as.data.frame(sapply(x.all2, function(x) signif(x, 2)))
rownames(x.all2) <- tmp

datatable(x.all2)

11 Model: ANCOM for differential proportion

11.1 methylCC

11.1.1 Prep

# Prepare data
metadata <- hseq_ctp_pheno.tmp %>% dplyr::select(c("IID", "group", "age", "sex", "race", "plate", "slide"))
rownames(metadata) <- metadata$IID
metadata$group <- as.factor(metadata$group)

ancom_features <- t(hseq_ctp_pheno.tmp[,level2_cols])*10000
colnames(ancom_features) <- hseq_ctp_pheno.tmp$IID
ancom_features <- ancom_features[,which(colnames(ancom_features) %in% metadata$IID)]

# Check variables in the correct order
identical(colnames(ancom_features), metadata$IID)
## [1] TRUE
ancom_table <- feature_table_pre_process(ancom_features, metadata, "IID", group_var = NULL, out_cut = 0.05, zero_cut = 0.90, lib_cut = 10, neg_lb)

11.1.2 Case-control analysis with no covariates

# Run tests with covariates (age + sex + diet)
ancom_out_nocov <- ANCOM(ancom_table$feature_table, ancom_table$meta_data, ancom_table$structure_zeros, main_var = "group", p_adj_method = "BH")

ancom_out_nocov$out <- ancom_out_nocov$out[order(ancom_out_nocov$out$W, decreasing = T),]

datatable(ancom_out_nocov$out)
ancom_out_nocov$fig

11.1.3 Case-control analysis with covariates

# Run tests with covariates (age + sex + diet)
ancom_out_cov <- ANCOM(ancom_table$feature_table, ancom_table$meta_data, ancom_table$structure_zeros, main_var = "group", p_adj_method = "BH", adj_formula = "age + sex + race + plate")

ancom_out_cov$out <- ancom_out_cov$out[order(ancom_out_cov$out$W, decreasing = T),]

datatable(ancom_out_cov$out)
ancom_out_cov$fig

12 Correlation plots

cov_var <- c("IID", "group", "age", "sex", "race", "plate")

12.1 Glial

12.1.1 Aggregated

  • Jaffe: comp_neun_neg
  • eBayes + Houseman (coefs_brain): h_NeuN_neg
  • methylCC (summed): Glial
  • smartsva: sSV1, sSV2
  • eBayes + Houseman (summed coefs_sub): h_Glial
    • include methylCC oligo since it seems to go together …
neunn <- ctp_pheno %>% dplyr::select(c(all_of(cov_var), "h_NeuN_neg", "hseq_Glial", "mcc_Glial", "sSV1", "sSV2", "h_Glial", "comp_neun_pos", "hseq_Oligo", "hseq_OPC", "mcc_Oligo", "mcc_OPC", "h_OLIGO", "h_OPC", "celfie_Glial", "celfie_Oligo", "celfie_OPC", "VAEe3", "VAEe9"))

# y = h_NeuN_neg
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "h_NeuN_neg"), measure.var = c("hseq_Glial", "mcc_Glial", "h_Glial", "celfie_Glial", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=h_NeuN_neg)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: hseq_Glial
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "hseq_Glial"), measure.var = c("h_NeuN_neg", "mcc_Glial", "h_Glial", "celfie_Glial", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=hseq_Glial)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: mcc_Glial
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "mcc_Glial"), measure.var = c("h_NeuN_neg", "hseq_Glial", "h_Glial", "celfie_Glial", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=mcc_Glial)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: h_Glial
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "h_Glial"), measure.var = c("h_NeuN_neg", "hseq_Glial", "mcc_Glial", "celfie_Glial", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=h_Glial)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: celfie_Glial
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "celfie_Glial"), measure.var = c("h_NeuN_neg", "hseq_Glial", "mcc_Glial", "h_Glial", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=celfie_Glial)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: sSV1
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "sSV1"), measure.var = c("h_NeuN_neg", "hseq_Oligo", "mcc_Oligo", "h_OLIGO", "celfie_Oligo", "VAEe9"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=sSV1)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: sSV1
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "sSV1"), measure.var = c("h_NeuN_neg", "hseq_Glial", "mcc_Glial", "h_Glial", "celfie_Glial", "hseq_Oligo", "hseq_OPC", "mcc_Oligo", "mcc_OPC", "h_OLIGO", "h_OPC", "celfie_Oligo", "celfie_OPC", "VAEe9"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=sSV1)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: VAEe9
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "VAEe9"), measure.var = c("h_NeuN_neg", "hseq_Oligo", "mcc_Oligo", "h_OLIGO", "celfie_Oligo", "sSV1"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=VAEe9)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

12.1.2 Compare individual glial sub-type CTPs

# Comparison between houseman extremes vs eBayes
ctp_pheno_sub_glial <- data.frame(ctp_pheno[,c("hseq_Glial", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "h_NeuN_neg", "h_Glial", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC")])

ggpairs(ctp_pheno_sub_glial)

# Comparison between houseman extremes vs methylCC extremes
ctp_pheno_sub_glial <- data.frame(ctp_pheno[,c("hseq_Glial", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "h_NeuN_neg", "h_Glial", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC")])

ggpairs(ctp_pheno_sub_glial)

# Comparison between houseman extremes vs CelFIE
ctp_pheno_sub_glial <- data.frame(ctp_pheno[,c("hseq_Glial", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "h_NeuN_neg", "celfie_Glial", "celfie_Astro", "celfie_Endo", "celfie_Micro", "celfie_Oligo", "celfie_OPC")])

ggpairs(ctp_pheno_sub_glial)

# Comparison between houseman array vs methylCC extremes
ctp_pheno_sub_glial <- data.frame(ctp_pheno[,c("h_Glial", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC", "h_NeuN_neg", "mcc_Glial", "mcc_Astro", "mcc_Endo", "mcc_Micro", "mcc_Oligo", "mcc_OPC")])

ggpairs(ctp_pheno_sub_glial)

12.2 Neuronal

12.2.1 Aggregated

# Select columns
neunp <- ctp_pheno %>% dplyr::select(c(all_of(cov_var), "h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "comp_neun_pos", "sSV1", "VAEe3"))

# y: h_NeuN_pos
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "h_NeuN_pos"), measure.var = c("hseq_Neuron", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "comp_neun_pos", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=h_NeuN_pos)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = T) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: hseq_Neuron
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "hseq_Neuron"), measure.var = c("h_NeuN_pos", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "comp_neun_pos", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=hseq_Neuron)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = T) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: mcc_Neuron
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "mcc_Neuron"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "h_Neuron", "celfie_Neuron", "comp_neun_pos", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=mcc_Neuron)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = T) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: h_Neuron
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "h_Neuron"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "celfie_Neuron", "comp_neun_pos", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=h_Neuron)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = T) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: celfie_Neuron
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "celfie_Neuron"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "h_Neuron", "comp_neun_pos", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=celfie_Neuron)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = T) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: comp_neun_pos
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "comp_neun_pos"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "sSV1", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=comp_neun_pos)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = T) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: sSV1
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "sSV1"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "comp_neun_pos", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=sSV1)) + geom_point(aes(colour = plate)) + scale_colour_viridis(discrete = T) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

12.2.2 Compare individual glial sub-type CTPs

if (cellnum == 7) {
  ctp_pheno_sub_neuron <- data.frame(ctp_pheno[,c("hseq_Neuron", "hseq_Exc", "hseq_Inh", "h_Neuron", "h_GLU", "h_GABA", "mcc_Neuron", "mcc_Exc", "mcc_Inh", "h_NeuN_pos")])

} else if (cellnum == 9) {
  ctp_pheno_sub_neuron <- data.frame(ctp_pheno[,c("Neuron", "Exc_upper", "Exc_lower", "Inh_MGE", "Inh_CGE", "h_NeuN_pos", "h_Neuron", "h_GABA", "h_GLU")])
}

ggpairs(ctp_pheno_sub_neuron)

12.3 All together

ctp_pheno_sub <- data.frame(ctp_pheno[,c("hseq_Exc", "hseq_Inh", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "h_GLU", "h_GABA", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC")])

#svg(paste(out_dir, "/", filen, "_ctp_hseq_harray_cor_matrix.svg", sep = ""), height = 6, width = 6)
jpeg(paste(out_dir, "/", filen, "_ctp_hseq_harray_cor_matrix.jpeg", sep = ""), height = 600, width = 600, quality = 100)
g <- ggduo(ctp_pheno_sub, colnames(ctp_pheno_sub)[1:7], colnames(ctp_pheno_sub)[8:14], xlab = "Houseman + sequencing reference", ylab = "Houseman + array reference", title = "LIBD")
print(g)
## plot: [1,1] [>-------------------------------------------------] 2% est: 0s
## plot: [1,2] [=>------------------------------------------------] 4% est: 3s
## plot: [1,3] [==>-----------------------------------------------] 6% est: 4s
## plot: [1,4] [===>----------------------------------------------] 8% est: 6s
## plot: [1,5] [====>---------------------------------------------] 10% est: 6s
## plot: [1,6] [=====>--------------------------------------------] 12% est: 5s
## plot: [1,7] [======>-------------------------------------------] 14% est: 5s
## plot: [2,1] [=======>------------------------------------------] 16% est: 5s
## plot: [2,2] [========>-----------------------------------------] 18% est: 5s
## plot: [2,3] [=========>----------------------------------------] 20% est: 5s
## plot: [2,4] [==========>---------------------------------------] 22% est: 5s
## plot: [2,5] [===========>--------------------------------------] 24% est: 5s
## plot: [2,6] [============>-------------------------------------] 27% est: 4s
## plot: [2,7] [=============>------------------------------------] 29% est: 4s
## plot: [3,1] [==============>-----------------------------------] 31% est: 4s
## plot: [3,2] [===============>----------------------------------] 33% est: 4s
## plot: [3,3] [================>---------------------------------] 35% est: 4s
## plot: [3,4] [=================>--------------------------------] 37% est: 4s
## plot: [3,5] [==================>-------------------------------] 39% est: 4s
## plot: [3,6] [===================>------------------------------] 41% est: 4s
## plot: [3,7] [====================>-----------------------------] 43% est: 3s
## plot: [4,1] [=====================>----------------------------] 45% est: 3s
## plot: [4,2] [======================>---------------------------] 47% est: 3s
## plot: [4,3] [=======================>--------------------------] 49% est: 3s
## plot: [4,4] [=========================>------------------------] 51% est: 3s
## plot: [4,5] [==========================>-----------------------] 53% est: 3s
## plot: [4,6] [===========================>----------------------] 55% est: 3s
## plot: [4,7] [============================>---------------------] 57% est: 3s
## plot: [5,1] [=============================>--------------------] 59% est: 2s
## plot: [5,2] [==============================>-------------------] 61% est: 2s
## plot: [5,3] [===============================>------------------] 63% est: 2s
## plot: [5,4] [================================>-----------------] 65% est: 2s
## plot: [5,5] [=================================>----------------] 67% est: 2s
## plot: [5,6] [==================================>---------------] 69% est: 2s
## plot: [5,7] [===================================>--------------] 71% est: 2s
## plot: [6,1] [====================================>-------------] 73% est: 2s
## plot: [6,2] [=====================================>------------] 76% est: 1s
## plot: [6,3] [======================================>-----------] 78% est: 1s
## plot: [6,4] [=======================================>----------] 80% est: 1s
## plot: [6,5] [========================================>---------] 82% est: 1s
## plot: [6,6] [=========================================>--------] 84% est: 1s
## plot: [6,7] [==========================================>-------] 86% est: 1s
## plot: [7,1] [===========================================>------] 88% est: 1s
## plot: [7,2] [============================================>-----] 90% est: 1s
## plot: [7,3] [=============================================>----] 92% est: 0s
## plot: [7,4] [==============================================>---] 94% est: 0s
## plot: [7,5] [===============================================>--] 96% est: 0s
## plot: [7,6] [================================================>-] 98% est: 0s
## plot: [7,7] [==================================================]100% est: 0s
dev.off()
## png 
##   2

12.4 Unsupervised comparison

12.4.1 Neurons

if (cellnum == 7) {
  ctp_pheno_sv_neuron <- data.frame(ctp_pheno[,c("h_NeuN_pos", "hseq_Neuron", "hseq_Exc", "hseq_Inh", "sSV1", "sSV2", "sSV3", "sSV4", "sSV5", "sSV6", "sSV7", "sSV8")])
} else if (cellnum == 9) {
  ctp_pheno_sv_neuron <- data.frame(ctp_pheno[,c("h_NeuN_pos", "Neuron", "Exc_upper", "Exc_lower", "Inh_MGE", "Inh_CGE", "sSV1", "sSV2", "sSV3", "sSV4", "sSV5", "sSV6", "sSV7", "sSV8")])
}

ggpairs(ctp_pheno_sv_neuron)

12.4.2 Glial

ctp_pheno_sv_glial <- data.frame(ctp_pheno[,c("h_NeuN_neg", "hseq_Glial", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "sSV1", "sSV2", "sSV3", "sSV4", "sSV5", "sSV6", "sSV7", "sSV8")])

ggpairs(ctp_pheno_sv_glial)